##
## Main analyses.
##
## Author: 
## Soubhik Barari
##
## Environment:
## * R 4+
## * 15-30 GB of RAM
##
## Runtime: 
## * 15-20 minutes to load all data and re-run analyses
## * 30 minutes to load all data and re-run analyses
##

# 0.) PRE-AMBLE ---------------------------------------------------------------

source("util.R")

RUN_INTERACTIVE = TRUE ## When running `source('main.R')`, load data and then stop

load.t0 <- Sys.time()
if (!"Brands" %in% ls()) 
  load("corpspeak_basics.RData");            cat("brands✓\n");
if (!"Brand_Msgs" %in% ls()) 
  load("corpspeak_text.RData");              cat("text✓\n");
if (!"Brand_Msgs_Q" %in% ls()) 
  load("corpspeak_text_dfms.RData");         cat("text DFMs✓\n");
if (!"scaled_text" %in% ls())
  load("corpspeak_scaled_text.RData");       cat("ideal points✓\n");
if (!"scaled_contrib_opsec" %in% ls()) 
  load("corpspeak_scaled_opsec.RData");      cat("donations (OpenSecrets)✓\n");
if (!"scaled_contrib_FEC" %in% ls())
  load("corpspeak_scaled_FEC.RData");        cat("donations (FEC)✓\n");
if (!"scaled_contrib_stuckatz2021" %in% ls()) 
  load("corpspeak_scaled_stkz21.RData");     cat("donations (Stuckatz 2021)✓\n");
if (!"scaled_legis" %in% ls()) 
  load("corpspeak_scaled_legis.RData");      cat("lobbying✓\n");
if (!"scaled_foll" %in% ls()) 
  load("corpspeak_scaled_followers.RData");  cat("followers✓\n");
load.t1 <- Sys.time()
print(load.t1 - load.t0); rm(load.t0, load.t1); ## ~10 min

if ("State.x" %in% colnames(Brands) & "State.y" %in% colnames(Brands)) {
  Brands <- Brands %>%
    mutate(State = coalesce(State.x, State.y)) %>%
    select(-State.x, -State.y)
}

partisan_bigrams_bytype <-
  read_delim("data_interim/brand_partisan_bigrams_top_type.txt", delim = "|") %>% 
  filter(!is.na(feature)) ## update this manually as needed

partisan_hashtags_bytype <- 
  read_delim("data_interim/brand_partisan_hashtags_top_type.txt", delim = "|") %>% 
  filter(!is.na(feature)) ## update this manually as needed

## Min number of brands for industry to be included for industry-level analyses
INDUSTRY_MIN_BRANDS = 5

## Remove any brands not categorized into an industry?
REMOVE_NA_INDUSTRIES = FALSE

## Min number of partisan bigrams for brand to be included in brand-level analyses
MIN_PARTISAN_BIGRAMS = 5

## Min number of occurrences for a bigram to included in text-level analysees
MIN_BIGRAM_COUNT = 5

theme_custom <-
  theme_bw() +
  theme(strip.background = element_rect(fill="black"),
        strip.text = element_text(color="white", face="bold", size=12))

theme_custom_vertpanel <- 
  theme_bw() +
  theme(strip.background = element_rect(fill="black"),
        strip.text.x = element_text(color="white", face="bold", size=10),
        strip.text.y = element_text(color="white", face="bold", size=10, angle=0))

# 1.) PREPARE DATA ------------------------------------------------------------

## 1.1. Unit: Brands -----------------------------------------------------------

### Check coverage ----
table(Brands$yougov_name %in% 
        scaled_legis$brands_nonpara$yougov_name)

table(Brands$yougov_name %in% 
        scaled_contrib_stuckatz2021$brands_nonpara$yougov_name)

table(Brands$yougov_name %in% 
        scaled_contrib_FEC$brands_nonpara$yougov_name)

table(Brands$yougov_name %in% scaled_contrib_stuckatz2021$brands_nonpara$yougov_name |
        Brands$yougov_name %in% scaled_contrib_FEC$brands_nonpara$yougov_name)

### Merge covariates ----
message("Merging Covariates:")

sl_brands_followers <- readRDS("data_interim/social-listening_brands_followers.rds")

sl_brands_followers.wide <- sl_brands_followers %>% 
  select(-sl_name) %>%
  group_by(yougov_name, sl_Year_Month) %>%
  summarise_all(~mean(.x,na.rm=T)) %>%
  pivot_wider(names_from = sl_Year_Month, values_from = -c(yougov_name, sl_Year_Month), names_sep=".") %>%
  ungroup()

d.brands <- Brands %>%
  arrange(desc(yougov_pct_recognition)) %>%
  head(1000) %T>%
  
  { message("++ Firm Lobbying") } %>%
  select(-starts_with("legis")) %>%
  left_join(scaled_legis$brands_nonpara %>% 
              group_by(yougov_name) %>%
              summarise(legis.D_frac = mean(D_frac,na.rm=T), 
                        legis.n = sum(n_legis), 
                        legis.dwnom1_mean = mean(Nominate_dim1_mean,na.rm=T), 
                        legis.dwnom1_sd = mean(Nominate_dim1_sd,na.rm=T)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Employee Contributions (FEC)") } %>%
  select(-starts_with("FEC")) %>%
  left_join(scaled_contrib_FEC$brands_nonpara %>%
              rename_at(vars(-starts_with("yougov_name")), ~paste0("FEC.", .x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Employee Contributions (OpenSecrets)") } %>%
  select(-starts_with("opsec")) %>%
  left_join(scaled_contrib_opsec$brands_nonpara %>%
              rename_at(vars(-starts_with("yougov_name")), ~paste0("opsec.", .x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Employee Contributions (Stuckatz 2021)") } %>%
  select(-starts_with("stkz21")) %>%
  left_join(scaled_contrib_stuckatz2021$brands_nonpara %>%
              rename_at(vars(-starts_with("yougov_name")), ~paste0("stkz21.", .x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Twitter Followers (scaled via MCs)") } %>%
  select(-starts_with("twitter.foll_MC")) %>%
  left_join(scaled_foll$brands_foll_scaled_MC_df %>%
              select(yougov_name, ### brand followers scaled only using MCs' accounts
                     twitter.foll_MC_slant=foll_MC_slant, 
                     twitter.foll_MC_slant_wtd=foll_MC_slant_wtd,
                     twitter.foll_MC_accts_n=n_MC_accts),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Twitter Followers (scaled via many accts)") } %>%
  select(-starts_with("twitter.foll_ideo")) %>%
  left_join(scaled_foll$brands_foll_scaled_ideo_df %>%
              select(yougov_name, ### brand followers scaled using many actors
                     twitter.foll_ideo_slant=foll_ideo_slant, 
                     twitter.foll_ideo_slant_wtd=foll_ideo_slant_wtd,
                     twitter.foll_ideo_accts_n=n_ideo_accts),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Twitter Followers (social-listening.org)") } %>%
  select(-starts_with("sl")) %>%
  left_join(sl_brands_followers.wide %>%
              rename_at(vars(starts_with("sl")), ~gsub("sl_","sl.",.x)) %>%
              rename_at(vars(starts_with("sl")), ~gsub("-","_",.x)),
            select(yougov_name, starts_with("sl")),
            by = "yougov_name")

d.brands <- d.brands %>%
  select(-starts_with("R_don_share")) %>%
  mutate(stkz21.don = stkz21.don_govt + stkz21.don_exec + stkz21.don_prmkt + stkz21.don_rnf) %>%
  mutate(stkz21.R_share = ((stkz21.don_govt*stkz21.R_share_govt) + (stkz21.don_exec*stkz21.R_share_exec) + (stkz21.don_prmkt*stkz21.R_share_prmkt) + (stkz21.don_rnf*stkz21.R_share_rnf))/stkz21.don) %>%
  mutate(`R_don_share`                   = coalesce(FEC.R_don_share, stkz21.R_share, opsec.R_share.indiv_dollars),
         `R_don_share.Board_Member`      = FEC.R_don_share.Board_Member,
         `R_don_share.Managers`          = FEC.R_don_share.Managers,
         `R_don_share.Legal`             = FEC.R_don_share.Legal,
         `R_don_share.Human_Resources`   = FEC.R_don_share.Human_Resources,
         `R_don_share.Top_Exec`          = coalesce(FEC.R_don_share.Top_Exec, stkz21.R_share_exec),
         `R_don_share.Public_Relations`  = coalesce(FEC.R_don_share.Public_Relations, stkz21.R_share_govt),
         `R_don_share.Marketing`         = coalesce(FEC.R_don_share.Marketing, stkz21.R_share_prmkt),
         `R_don_share.Rank_and_File`     = coalesce(FEC.R_don_share.Rank_and_File, stkz21.R_share_rnf))

d.brands <- d.brands %>%
  mutate(legis.R_frac = 1-legis.D_frac)

d.brands <- d.brands %>%
  mutate(`yougov_aud.Income.<$40k`=
           `yougov_aud.income.<10000`+
           `yougov_aud.income.10000-19999`+
           `yougov_aud.income.20000-29999`+
           `yougov_aud.income.30000-39999`,
         `yougov_aud.Income.$40-80k`=
           `yougov_aud.income.40000-49999`+
           `yougov_aud.income.50000-59999`+
           `yougov_aud.income.60000-69999`+
           `yougov_aud.income.70000-79999`,
         `yougov_aud.Income.$80-200k`=
           `yougov_aud.income.80000-99999`+
           `yougov_aud.income.100000-119999`+
           `yougov_aud.income.120000-149999`+
           `yougov_aud.income.150000-199999`,
         `yougov_aud.Income.>$200k`=
           `yougov_aud.income.200000-249999`+
           `yougov_aud.income.250000-349999`+
           `yougov_aud.income.350000-499999`+
           `yougov_aud.income.500000+`)

d.brands <- d.brands %>%
  mutate(yougov_aud.gender.Female = as.numeric(gsub("\\%","",yougov_aud.gender.Female)))
d.brands <- d.brands %>%
  mutate(yougov_aud.gender.Male = as.numeric(gsub("\\%","",yougov_aud.gender.Male)))

d.brands <- d.brands %>%
  mutate(glassdoor_rev.bil = as.numeric(str_extract(str_extract(glassdoor_rev, "(\\d{1,3})\\+? billion"), "\\d{1,3}"))) %>%
  mutate(glassdoor_rev.mil = as.numeric(str_extract(str_extract(glassdoor_rev, "(\\d{1,3})\\+? million"), "\\d{1,3}"))) %>%
  mutate(glassdoor_rev.bil = ifelse(is.na(glassdoor_rev.bil), 0, glassdoor_rev.bil)) %>%
  mutate(glassdoor_rev.mil = ifelse(is.na(glassdoor_rev.mil), 0, glassdoor_rev.mil)) %>%
  mutate(glassdoor_rev_usd = glassdoor_rev.bil*1e+09+glassdoor_rev.mil*1e+06) %>%
  mutate(glassdoor_rev_usd = ifelse(glassdoor_rev_usd==0, NA, glassdoor_rev_usd)) %>%
  select(-glassdoor_rev.bil, -glassdoor_rev.mil)
  
d.brands <- d.brands %>%
  mutate(glassdoor_num_empl = as.numeric(str_extract(str_extract(glassdoor_size, "(\\d*)\\+? Employees"), "\\d*")))

d.brands <- d.brands %>%
  mutate(orbis_num_empl = as.numeric(orbis_num_empl))

stopifnot(sum(is.na(d.brands$yougov_brand_category)) == 0)

sum(!is.na(d.brands$tw_account) | !is.na(d.brands$ig_account))

if (FALSE) {
  d.brands %>%
    with(., unique(yougov_brand_category))
  d.brands %>% 
    filter(is.na(yougov_brand_category)) %>% 
    select(yougov_name)
}

### Merge location data ----
d.locs.sg <- readRDS("data_interim/safegraph_brand_locs_votes.rds")
d.locs.sg <- d.locs.sg %>%
  rowwise() %>% #### get best estimate of partisan geography
  mutate_at(.vars = vars(starts_with("Pres.DEM")), .funs = ~1-.x) %>%
  mutate(pres.REP.stfips = mean(c(Pres.DEM.08.stfips, Pres.DEM.12.stfips, Pres.DEM.16.stfips), na.rm=T)) %>%
  mutate(pres.REP.zip = mean(c(Pres.DEM.08.zipcode, Pres.DEM.12.zipcode, Pres.DEM.16.zipcode), na.rm=T)) %>%
  mutate(pres.REP.county = mean(c(Pres.REP.12.county, Pres.REP.16.county, Pres.REP.20.county), na.rm=T)) %>%
  mutate(pres.REP = coalesce(pres.REP.zip, pres.REP.stfips, pres.REP.county)) %>%
  group_by(yougov_name) %>%   
  summarise(sg.pres.REP = mean(pres.REP, na.rm=T),
  		    sg.pres.REP.county = mean(pres.REP.county, na.rm=T),
            sg.n.locs = n(),
            .groups = "drop")
message("++ SafeGraph data")

d.locs.zi <- readRDS("data_interim/zippia_brand_locs_votes.rds")
d.locs.zi <- d.locs.zi %>% 
  rowwise() %>% #### get best estimate of partisan geography
  mutate_at(.vars = vars(starts_with("Pres.DEM")), .funs = ~1-.x) %>%
  mutate(pres.REP.stfips = mean(c(Pres.DEM.08.stfips, Pres.DEM.12.stfips, Pres.DEM.16.stfips), na.rm=T)) %>%
  mutate(pres.REP.zip = mean(c(Pres.DEM.08.zipcode, Pres.DEM.12.zipcode, Pres.DEM.16.zipcode), na.rm=T)) %>%
  mutate(pres.REP.county = mean(c(Pres.REP.12.county, Pres.REP.16.county, Pres.REP.20.county), na.rm=T)) %>%
  mutate(zi.pres.REP = coalesce(pres.REP.zip, pres.REP.stfips, pres.REP.county))
d.locs.zi <- d.locs.zi %>% #### match back to brands
  mutate(zippia_url = ifelse(!is.na(url), paste0(url,"/"), url)) %>%
  distinct(manufacturer, parent, yougov_name, zi.pres.REP, pres.REP.county, zippia_url, Location.ST, Location.Name) %>%
  coalesce_join(distinct(Brands, yougov_name, zippia_url) %>% 
                  filter(!is.na(zippia_url)), by = c("zippia_url"), join = dplyr::left_join) %>%
  coalesce_join(distinct(Brands, yougov_name, manufacturer) %>% 
                  filter(!is.na(manufacturer)), by = c("manufacturer"), join = dplyr::left_join) %>%
  coalesce_join(distinct(Brands, yougov_name, parent) %>% 
                  filter(!is.na(parent)), by = c("parent"), join = dplyr::left_join)
d.locs.zi <- d.locs.zi %>%
  group_by(yougov_name) %>%
  summarise(zi.pres.REP = mean(zi.pres.REP), 
  		    zi.pres.REP.county = mean(pres.REP.county, na.rm=T),
            zi.n.locs = n(), 
            .groups = "drop")
message("++ Zippia data")

d.brands <- d.brands %>%
  select(-starts_with("sg.")) %>%
  left_join(d.locs.sg, by = "yougov_name") %>%
  select(-starts_with("zi.")) %>%
  left_join(d.locs.zi, by = "yougov_name")

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

### Merge ideal points ----
message("Merging Ideal Points:")

d.brands <- d.brands %>%
  select(-starts_with("ideal")) %T>%
  
  { message("++ Non-parametric") } %>%
  left_join(scaled_text$brands_bigrams_nonpara %>%
              orient_idealpts_brands(ideal_col = "ideal", scale = F) %>%
              mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal","ideal.main",.x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Twitter only") } %>%
  left_join(scaled_text_alt$Twitter_only$brands_bigrams_nonpara %>%
              orient_idealpts_brands(ideal_col = "ideal", scale = F) %>%
              mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal","ideal.tw",.x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Instagram only") } %>%
  left_join(scaled_text_alt$Instagram_only$brands_bigrams_nonpara %>%
              orient_idealpts_brands(ideal_col = "ideal", scale = F) %>%
              mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal","ideal.ig",.x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Stances only") } %>%
  left_join(scaled_text_alt$stances_only$brands_bigrams_nonpara %>%
              orient_idealpts_brands(ideal_col = "ideal", scale = F) %>%
              mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal","ideal.stances",.x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Issues only") } %>%
  left_join(scaled_text_alt$issues_only$brands_bigrams_nonpara %>%
              # orient_idealpts_brands(ideal_col = "ideal", scale = F, anchor_brand_rgx = "Shell", anchor_sign = 1) %>% ## doesn't need re-orienting
              mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal","ideal.issues",.x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Groups only") } %>%
  left_join(scaled_text_alt$group_only$brands_bigrams_nonpara %>%
              # orient_idealpts_brands(ideal_col = "ideal", scale = F, anchor_brand_rgx = "Shell", anchor_sign = 1) %>% ## doesn't need re-orienting
              mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal","ideal.groups",.x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Parametric model") } %>%
  left_join(scaled_text_alt$par_boot_mdl$brands_bigrams_pois_point %>%
              orient_idealpts_brands(ideal_col = "slant", scale = F) %>%
              rename(ideal.mdl=slant, ideal.mdl.intercept=intercept),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Parametric model (by category)") } %>%
  left_join(scaled_text_alt$par_boot_mdl$brands_bycategory_bigrams_pois_point %>%
              orient_idealpts_brands(ideal_col = "slant", scale = F) %>%
              select(yougov_name, ideal.mdl.bycat=slant, ideal.mdl.bycat.intercept=intercept),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Parametric model (bootstrapped)") } %>%
  left_join(scaled_text_alt$par_boot_mdl$brands_bigrams_pois_boots %>%
              orient_idealpts_brands(ideal_col = "slant", scale = F) %>%
              group_by(yougov_name) %>%
              summarise(intercept.ci.95.upr  = quantile(intercept, probs=0.975),
                        intercept.ci.95.lwr  = quantile(intercept, probs=0.025),
                        intercept            = quantile(intercept, probs=0.5),
                        slant.ci.95.upr      = quantile(slant, probs=0.975),
                        slant.ci.95.lwr      = quantile(slant, probs=0.025),
                        slant                = quantile(slant, probs=0.5),
                        .groups = "drop") %>%
              rename(ideal.mdl=slant,
                     ideal.mdl.ci.95.upr=slant.ci.95.upr,
                     ideal.mdl.ci.95.lwr=slant.ci.95.lwr,
                     ideal.mdl.intercept=intercept,
                     ideal.mdl.intercept.ci.95.upr=intercept.ci.95.upr,
                     ideal.mdl.intercept.ci.95.lwr=intercept.ci.95.lwr) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal.mdl","ideal.mdl.boot",.x)),
            by = "yougov_name") %T>%
  with(., stopifnot(nrow(.) == nrow(distinct(.)))) %T>%
  
  { message("++ Parametric model (bootstrapped + by category)") } %>%
  left_join(scaled_text_alt$par_boot_mdl$brands_bycategory_bigrams_pois_boots %>%
              orient_idealpts_brands(ideal_col = "slant", scale = F) %>%
              group_by(yougov_name) %>%
              summarise(intercept.ci.95.upr = quantile(intercept, probs=0.975),
                        intercept.ci.95.lwr = quantile(intercept, probs=0.025),
                        intercept           = quantile(intercept, probs=0.5),
                        slant.ci.95.upr     = quantile(slant, probs=0.975),
                        slant.ci.95.lwr     = quantile(slant, probs=0.025),
                        slant               = quantile(slant, probs=0.5),
                        .groups = "drop") %>%
              select(yougov_name,
                     ideal.mdl=slant, 
                     ideal.mdl.ci.95.upr=slant.ci.95.upr,
                     ideal.mdl.ci.95.lwr=slant.ci.95.lwr,
                     ideal.mdl.intercept=intercept,
                     ideal.mdl.intercept.ci.95.upr=intercept.ci.95.upr,
                     ideal.mdl.intercept.ci.95.lwr=intercept.ci.95.lwr) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal.mdl","ideal.mdl.bycat.boot",.x)),
            by = "yougov_name")

if (FALSE) { 
  
  d.brands %>% ## spot check non-parametric ideal points
    arrange(-ideal.main) %>% 
    filter(!is.na(ideal.main)) %>%
    mutate(yougov_name = as_factor(yougov_name)) %>%
    ggplot(aes(y=yougov_name, x=ideal.main, xmin=ideal.main.ci.95.lwr, xmax=ideal.main.ci.95.upr)) +
    geom_vline(xintercept=0) +
    geom_pointrange(alpha=0.5) +
    theme_bw()
  
  d.brands %>% ## spot check bootstrapped parametric ideal points
    arrange(-ideal.mdl.boot) %>% 
    filter(!is.na(ideal.mdl.boot)) %>%
    mutate(yougov_name = as_factor(yougov_name)) %>%
    ggplot(aes(y=yougov_name, x=ideal.mdl.boot, xmin=ideal.mdl.boot.ci.95.lwr, xmax=ideal.mdl.boot.ci.95.upr)) +
    geom_vline(xintercept=0) +
    geom_pointrange(alpha=0.5) +
    theme_bw()
  
  d.brands %>% ## does non-parametric estimates' standard error correlate with extremity?
    arrange(-ideal.main) %>% 
    filter(!is.na(ideal.main)) %>%
    mutate(yougov_name = as_factor(yougov_name)) %>%
    ggplot(aes(x=abs(ideal.main), y=ideal.main.ci.95.upr-ideal.main.ci.95.lwr)) +
    geom_point() +
    geom_smooth(method="lm", color="purple") +
    geom_smooth(method="loess", color="orange") +
    theme_bw()
  
  d.brands %>% ## does parametric estimates' standard error correlate with extremity?
    arrange(-ideal.main) %>% 
    filter(!is.na(ideal.main)) %>%
    mutate(yougov_name = as_factor(yougov_name)) %>%
    ggplot(aes(x=abs(ideal.mdl.boot), y=ideal.mdl.boot.ci.95.upr-ideal.mdl.boot.ci.95.lwr)) +
    geom_point() +
    geom_smooth(method="lm", color="purple") +
    geom_smooth(method="loess", color="orange") +
    theme_bw()
}

idealpt.vars <- d.brands %>%
  colnames(.) %>%
  .[grepl("ideal", .)] %>%
  gsub("(ideal\\.|\\.se|\\.n|\\.intercept)","",.) %>%
  unique(.)

### Merge firm activity data ----
message("Merging Activity Data:")

#### CDP data ----
cdp_brand_climate_scores <- readRDS("data_interim/cdp_brand_climate_scores.rds")
cdp_score_grades <- rev(sort(unique(cdp_brand_climate_scores$cdp_score)))
cdp_brand_climate_scores$cdp_score <- factor(cdp_brand_climate_scores$cdp_score, levels = cdp_score_grades)

cdp_brand_climate_avg_scores <- cdp_brand_climate_scores %>%
  group_by(yougov_name) %>%
  summarise(cdp_avg_score = mean(as.numeric(cdp_score)), .groups = "drop")

d.brands <- d.brands %>%
  select(-starts_with("cdp")) %>%
  left_join(cdp_brand_climate_avg_scores, by = "yougov_name")

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

#### Climate Action 100+ data ----
climateaction_100_brand_accounting  <- readRDS("data_interim/climateaction_100_brand_accounting.rds")
climateaction_100_brand_auto        <- readRDS("data_interim/climateaction_100_brand_auto.rds")
climateaction_100_brand_aviat       <- readRDS("data_interim/climateaction_100_brand_aviat.rds")
climateaction_100_brand_electric    <- readRDS("data_interim/climateaction_100_brand_electric.rds")
climateaction_100_brand_oil         <- readRDS("data_interim/climateaction_100_brand_oil.rds")
climateaction_100_brand_policy      <- readRDS("data_interim/climateaction_100_brand_policy.rds")

climateaction_100_brand_disclosures <- readRDS("data_interim/climateaction_100_brand_disclosures.rds")

climateaction_100_brand_disclosures.wide <- climateaction_100_brand_disclosures %>%
  filter(!grepl("Progress", period), !is.na(yougov_name), !grepl("Bayerische Motoren Werke", clmaxn_company)) %>%
  mutate(nm = "clm100_discl") %>%
  select(yougov_name,nm,subindic,metric,period,value) %>%
  pivot_wider(names_from=c(nm,subindic,metric,period), values_from=value, names_sep = ".")

d.brands <- d.brands %>%
  select(-starts_with("clm100_discl")) %>%
  left_join(climateaction_100_brand_disclosures.wide, by = "yougov_name")

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

climateaction_100_brand_policy.wide <- climateaction_100_brand_policy %>%
  filter(!is.na(yougov_name), !grepl("Bayerische Motoren Werke", clmaxn_company)) %>%
  mutate(nm = "clm100_policy") %>%
  select(yougov_name,nm,indic,subindic,value) %>%
  pivot_wider(names_from=c(nm,indic,subindic), values_from=value, names_sep=".")

d.brands <- d.brands %>%
  select(-starts_with("clm100_policy")) %>%
  left_join(climateaction_100_brand_policy.wide, by = "yougov_name")

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

#### Good Jobs First violations ----
gjf_co_violations <- readRDS("data_interim/goodjobsfirst_company_violations.rds")

gjf_co_violations <- gjf_co_violations %>%
  filter(gjf_penalty_year %in% STUDY_YEARS) %>%
  mutate(gjf_penalty_amt = as.numeric(gsub("(\\$|\\,)", "", gjf_penalty_amt))) %>%
  group_by(yougov_name, gjf_primary_offense) %>%
  summarise(num_offenses = n(),
            totl_penalty_amt = sum(gjf_penalty_amt, na.rm=T),
            .groups = "drop")

gjf_relv_offenses <- c(
  "agribusiness violation",
  "wage and hour violation",
  "workplace safety or health violation",
  "workplace whistleblower retaliation",
  "labor relations violation",
  "employment discrimination",
  "discriminatory practices (non-employment)",
  "consumer protection violation",
  "Family and Medical Leave Act",
  "environmental violation"
)

gjf_co_violations.wide <- gjf_co_violations %>% 
  rename(gjf.n_off = num_offenses, gjf.totl_penalty_amt = totl_penalty_amt) %>%
  pivot_wider(names_from = gjf_primary_offense, values_from = c(gjf.n_off, gjf.totl_penalty_amt), names_sep=".") %>%
  ungroup()

d.brands <- d.brands %>%
  select(-starts_with("gjf")) %>%
  left_join(gjf_co_violations.wide, by = "yougov_name")

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

d.brands <- d.brands %>%
  mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
  mutate(`gjf.n_off.labor`   = `gjf.n_off.wage and hour violation`+`gjf.n_off.labor relations violation`+`gjf.n_off.workplace safety or health violation`+`gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`) %>%
  mutate(`gjf.n_off.discr`   = `gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`) %>%
  mutate(`gjf.n_off.environ` = `gjf.n_off.environmental violation`+`gjf.n_off.energy conservation violation`)

#### HRC workplace ratings ----
hrc_brand_equality_ratings <- readRDS("data_interim/hrc_brand_equality_ratings.rds")

d.brands <- d.brands %>%
  select(-starts_with("hrc_")) %>%
  left_join(hrc_brand_equality_ratings %>%
              group_by(yougov_name) %>%
              summarise(hrc_rating = mean(hrc_rating, na.rm=T),
                        hrc_state = first(hrc_state),
                        .groups = "drop") %>%
              select(yougov_name, hrc_state, hrc_rating), by = "yougov_name")

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

#### Glassdoor demographics ----
glassdoor_demogs <- readRDS("data_interim/glassdoor_demogs.rds")

glassdoor_demogs.wide <- glassdoor_demogs %>%
  group_by(yougov_name, type, group) %>%
  summarise(num_reviews = sum(as.numeric(num_reviews),na.rm=T),
            avg_rating = mean(as.numeric(avg_rating),na.rm=T),
            .groups = "drop") %>%
  mutate(nm = "gd") %>%
  filter(!is.na(yougov_name), !is.na(type), !is.na(group)) %>%
  select(yougov_name,nm,type,group,num_reviews,avg_rating) %>%
  pivot_wider(names_from=c(nm,type,group), values_from=c(num_reviews,avg_rating), names_glue = "{nm}.{type}.{group}.{.value}")

d.brands <- d.brands %>%
  left_join(glassdoor_demogs.wide, by = "yougov_name")

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

### Merge HQ congress data ----
message("Merging HQ Congessional Representation Data:")

zipcode_house_dw <- readRDS("data_interim/zipcode_house_dw.rds")
zipcode_sen_dw   <- readRDS("data_interim/zipcode_sen_dw.rds")

d.brands <- d.brands %>%
  select(-starts_with("cong_house")) %>%
  mutate(hq_uszip = ifelse(is.na(hq_uszip), NA, sprintf("%05d", as.numeric(hq_uszip)))) %>%
  left_join(zipcode_house_dw %>%
              filter(year %in% STUDY_YEARS, !grepl("failed to decode utf16", zip)) %>%
              group_by(zip) %>%
              summarise(cong_house_dw_mean = mean(nominate_dim1),
                        .groups = "drop"),
            by = c("hq_uszip"="zip"))

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

d.brands <- d.brands %>%
  select(-starts_with("cong_sen")) %>%
  mutate(hq_usstate = state.abb[match(hq_usstate, state.name)]) %>%
  mutate(hq_usstate = coalesce(hq_usstate, hrc_state, glassdoor_hq_state)) %>%
  left_join(zipcode_sen_dw %>%
              filter(year %in% STUDY_YEARS, !grepl("failed to decode utf16", zip), !is.na(bioname)) %>%
              distinct(year, state_abbrev, bioname, nominate_dim1) %>%
              group_by(state_abbrev) %>%
              summarise(cong_sen_dw_mean = mean(nominate_dim1),
                        .groups = "drop"),
            by = c("hq_usstate"="state_abbrev"))

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

### Merge all stakeholders together ----

d.brands <- d.brands %>%
  ### sanitize data
  rowwise() %>%
  mutate(hq_pres.REP.stfips = mean(c(1-hq_Pres.DEM.08.stfips, 1-hq_Pres.DEM.12.stfips, 1-hq_Pres.DEM.16.stfips), na.rm=T)) %>%
  mutate(hq_pres.REP.zip = mean(c(1-hq_Pres.DEM.08.zipcode, 1-hq_Pres.DEM.12.zipcode, 1-hq_Pres.DEM.16.zipcode), na.rm=T)) %>%
  mutate(hq_pres.REP.county = mean(c(hq_Pres.REP.12.county, hq_Pres.REP.16.county, hq_Pres.REP.20.county), na.rm=T)) %>%
  mutate(hq_pres.REP = coalesce(hq_pres.REP.zip, hq_pres.REP.stfips, hq_pres.REP.county)) %>%
  mutate(loc.REP = coalesce(sg.pres.REP, zi.pres.REP)) %>%
  select(-starts_with("stkhl")) %>%
  mutate(stkhl.hq.cong_sen.R = cong_sen_dw_mean > 0,
         stkhl.hq.cong_house.R = cong_house_dw_mean > 0,
         stkhl.hq.voters.R = hq_pres.REP > 0.5,
         stkhl.loc.R = loc.REP > 0.5,
         stkhl.followers.R = twitter.foll_ideo_slant > 0,
         stkhl.affil.exec.R = FEC.donors_R.Top_Exec > 0.5,
         stkhl.affil.rank_and_file.R = FEC.donors_R.Rank_and_File > 0.5,
         stkhl.affil.pr.R = FEC.donors_R.Public_Relations > 0.5,
         stkhl.affil.board.R = FEC.donors_R.Board_Member > 0.5)

d.brands$stkhl.R <- d.brands %>%
  select_at(vars(starts_with("stkhl"))) %>% as.data.frame() %>%
  rowMeans(., na.rm=T)

### Merge counts of relevant dictionaries ----
message("Merging custom dictionaries:")

scaled_text$dfm_brand_partisan_df_long %>%
  group_by(feature, chi2) %>%
  summarise(count = sum(count), .groups="drop") %>%
  arrange(-count) %>%
  as.data.frame()

#### broad aggregated criteria ----
kwd_broad_rgx <- list(
  "Small Business"="small\\_busi",
  "Agriculture/Rural Communities"="(agricultur|ag\\_|rural)",
  "Reproductive Rights"="(birth\\_control|abort|reproduct|right\\_choos|woman|women|roe\\_|wade)",
  "Voting Rights"="(vote|voic\\_heard|democraci)",
  "Black Community"="(black|african\\_american|histori\\_mont)",
  "Asian/AAPI Community"="(aapi|asian)",
  "Hispanic/Latino Community"="(hispanic|latin)",
  "Women"="women",
  "LGBTQ"="(lgbtq|marriag\\_equal|hate\\_crim|pride\\_month)",
  "Racial Justice"="(floyd|racial|justic|white\\_suprem|hate\\_crim|right\\_movem|civil|communiti\\_color|racism)",
  "Indigenous/Native Community"="(indigen|nativ)",
  "Immigrants"="(immigr|asylum)",
  "Police Brutality"="polic\\_brutal",
  "Healthcare"="(expand\\_access|health|care|matern|prescript|drug)",
  "Gun Violence"="(shoot|gun|background\\_check)",
  "COVID-19 Measures"="(weak\\_mask|social\\_dist|covid|vaccin\\_sit)",
  "Climate/Environment"="(electr_vehicl|clean_air|climat|fossil_fuel|energi|environment|air_qualiti|save_planet|pollut)",
  "Labor"="(worker|job|benefit|equal|pay|wage)",
  "Family"="(famili|child)",
  "Armed Forces"="(arm\\_forc|brave\\_men)"
)

kwd_broad_rgx_counts <- pblapply(names(kwd_broad_rgx), function(cat){
  rgx <- kwd_broad_rgx[[cat]]
  scaled_text$dfm_brand_partisan_df_long %>%
    filter(grepl(rgx, feature)) %>%
    group_by(yougov_name) %>%
    summarise(count = sum(count), .groups="drop") %>%
    rename_at("count", ~ paste0("kwd.", cat)) %>%
    arrange(yougov_name)
})
kwd_broad_rgx_counts_df <- 
  purrr::reduce(kwd_broad_rgx_counts, full_join, by = "yougov_name")

#### specific individual phrases ----
kwd_specif_rgx <- list(
  ### ++ more right-leaning
  "Small Business"=list(
    "small business"="small\\_busi"
  ),
  "Agriculture/Rural"=list(
    "agriculture"="agricultur",
    "rural"="rural"
  ),
  "Law Enforcement"=list(
    "law enforcement"="law\\_enforc"
  ),
  "Armed Forces"=list(
    "brave men"="brave\\_men",
    "armed forces"="arm\\_forc"
  ),  
  ### ++ more left-leaning
  "LGBTQ"=list(
    "LGBTQ"="lgbtq",
    "marriage equality"="marriag\\_equal",
    "white supremacy"="white\\_supremac",
    "pride month"="pride\\_mont"
  ),
  "Voting"=list(
    "voting"="vote",
    "voter suppression"="voter\\_suppress",
    "voice heard"="voic\\_heard",
    "voter registration"="voter\\_registr"
  ),
  "Gender"=list(
    "women"="women",
    "birth control"="birth\\_control",
    "reproductive rights"="reproduct",
    "abortion"="abort",
    "seuxal harassment"="sexual_harass"
  ),
  "Racial"=list(
    "black"="(black|african\\_american)",
    "asian/AAPI"="aapi|asian",
    "latino"="latino",
    "indigenous"="indigen",
    "racial justice"="racial\\_justic",
    "racism"="racism",
    "george floyd"="floyd",
    "immigration"="immigr",
    "asylum seeker"="asylum\\_seeker",
    "civil rights"="(civil|right)",
    "underserved community"="underserv\\_commun",
    "stand in solidarity"="stand\\_solidar",
    "police brutality"="polic\\_brutal"    
  ),
  "Healthcare"=list(
    "healthcare"="healthcare",
    "health crisis"="health\\_cris",
    "mental health"="mental\\_health",
    "electric vehicles"="electric\\_vehicl"
  ),
  "Gun Violence"=list(
    "guns"="gun"
  ),
  "Environment/Climate"=list(
    "environmental justice"="environment\\_justic",
    "save the planet"="save\\_planet", 
    "climate change"="climat\\_chan",
    "climate crisis"="climat\\_cris",
    "protect the planet"="protect\\_planet"
  ),
  "COVID-19 Policy"=list(
    "wear a mask"="weak\\_mask",
    "social distance"="social\\_dist"
  ),
  "Labor"=list(
    "equal pay"="equal\\_pay",
    "worker"="worker",
    "minimum wage"="minimum\\_wage",
    "paid leave"="paid\\_leav",
    "benefits"="benefit"
  ),
  "Student Loans"=list(
    "student loans"="student\\_loan"
  )
)

kwd_specif_rgx_flat <- 
  unlist(kwd_specif_rgx, recursive = TRUE, use.names = TRUE)

kwd_specif_rgx_counts <- pblapply(names(kwd_specif_rgx_flat), function(cat){
  rgx <- kwd_specif_rgx_flat[[cat]]
  scaled_text$dfm_brand_partisan_df_long %>%
    filter(grepl(rgx, feature)) %>%
    group_by(yougov_name) %>%
    summarise(count = sum(count), .groups="drop") %>%
    rename_at("count", ~ paste0("kwd.", cat)) %>%
    arrange(yougov_name)
})
kwd_specif_rgx_counts_df <- 
  purrr::reduce(kwd_specif_rgx_counts, full_join, by = "yougov_name")

d.brands <- d.brands %>%
  select(-matches("kwd")) %>%
  left_join(kwd_broad_rgx_counts_df, by = "yougov_name") %>%
  left_join(kwd_specif_rgx_counts_df, by = "yougov_name")

stopifnot(nrow(d.brands) == nrow(distinct(d.brands)))

d.brands <- d.brands %>%
  mutate(`kwd.Nonwhite` = `kwd.Black Community`+`kwd.Asian/AAPI Community`+`kwd.Hispanic/Latino Community`+`kwd.Racial Justice`+`kwd.Indigenous/Native Community`)
  
mean(d.brands$`kwd.Racial Justice`,na.rm=T)
mean(d.brands$`kwd.Racial Justice` > 0,na.rm=T)
summary(d.brands$`kwd.Racial Justice`)

## 1.2. Unit: Text -------------------------------------------------------------

message("++ brand-level partisan text")
d.text.brands <- scaled_text$dfm_brand_partisan_df_long %>%
  ### orient
  mutate(chi2 = as.vector(chi2)) %>%
  orient_idealpts_text(ideal_col = "chi2", scale = F) %>%
  mutate(color = ifelse(chi2 >= 0, "red", "blue")) %>%
  ### summarise
  group_by(yougov_name, feature, chi2, color) %>%
  summarise(n_mentions=sum(count), 
            .groups = "drop") %>%
  arrange(chi2) %>%
  ### categorize
  left_join(partisan_bigrams_bytype %>% ## update this as needed
              mutate(feature = gsub(" ","_",feature)) %>%
              select(type, feature, type_party=party),
            by = "feature"
  )

message("++ industry-level partisan text")
d.text.indus <- d.text.brands %>%
  left_join(distinct(Brands, yougov_name, yougov_brand_category),
            by = "yougov_name") %>%
  group_by(yougov_brand_category, type, type_party, feature, chi2, color) %>%
  summarise(n_indus_brands=length(unique(yougov_name[n_mentions>0])),
            n_mentions=sum(n_mentions),
            .groups = "drop")

message("++ overall partisan text")
d.text <- d.text.brands %>%
  group_by(type, type_party, feature, chi2, color) %>%
  summarise(n_brands=length(unique(yougov_name[n_mentions>0])),
            n_mentions=sum(n_mentions),
            .groups = "drop")

## 1.3. Unit: Brands x Time ----------------------------------------------------

docvars(Brand_Msgs_Q$dfm, field="timestamp_ymd") <- 
  as.Date(substr(docvars(Brand_Msgs_Q$dfm, field="timestamp"), 1, 10))

### unit: partisan phrase p x date t ----
d.partisan.t <- Brand_Msgs_Q$dfm                                     %T>% { cat("\r[1/10]") } %>%
  #### subset to period
  dfm_subset(between(timestamp_ymd, STUDY_START_YMD, STUDY_END_YMD)) %T>% { cat("\r[2/10]") } %>%
  #### group original dfm by date
  dfm_group(groups=timestamp_ymd)                                    %T>% { cat("\r[3/10]") } %>%
  #### extract selected partisan phrases
  dfm_select(pattern=gsub(" ","_", 
                          scaled_text$partisan_bigrams$feature))     %T>% { cat("\r[4/10]") } %>%
  convert(to="data.frame")                                           %T>% { cat("\r[5/10]") } %>%
  gather(key="feature", value="count", -doc_id)                      %T>% { cat("\r[6/10]") } %>%
  #### id by YMD date
  rename(timestamp_ymd = doc_id)                                     %T>% { cat("\r[7/10]") } %>%
  mutate(feature = gsub("_"," ", feature))                           %T>% { cat("\r[8/10]") } %>%
  #### merge with type (e.g. issue, event)
  left_join(partisan_bigrams_bytype, by = "feature")                 %T>% { cat("\r[9/10]") } %>%
  #### merge with partisan keyness
  left_join(scaled_text$partisan_bigrams %>%
              as.data.frame() %>%
              mutate(feature = gsub("_"," ",feature)) %>%
              select(feature, chi2, n_target, n_reference), 
            by = "feature")                                          %T>% { cat("\r[10/10]") }
cat("\n")

d.partisan.t$timestamp_week <- 
  format(as.Date(d.partisan.t$timestamp_ymd), format="%W-%y")

### unit: date t ----
d.t <- Brand_Msgs %>%
  mutate(date=as.Date(timestamp)) %>%
  filter(between(date, STUDY_START_YMD, STUDY_END_YMD)) %>%  
  group_by(date) %>%
  summarise(n_brand_posts = n(),
            .groups = "drop")

### unit: site s x date t ----
d.site.t <- Brand_Msgs %>%
  mutate(date=as.Date(timestamp)) %>%
  filter(between(date, STUDY_START_YMD, STUDY_END_YMD)) %>%
  group_by(platform, date) %>%
  summarise(n_brand_posts = n(),
            .groups = "drop")

### unit: week w ----
d.wk <- d.partisan.t %>%
  filter(between(timestamp_ymd, STUDY_START_YMD, STUDY_END_YMD)) %>%  
  mutate(timestamp_wk = format(as.Date(timestamp_ymd), "%W-%y")) %>%
  mutate(year = substr(timestamp_ymd, 1, 4)) %>%
  group_by(year, timestamp_wk) %>%
  summarise(date=as.Date(first(timestamp_ymd)),
            n_brand_pty_mentions=sum(count),
            chi2=weighted.mean(chi2, count),
            .groups="drop")

d.partisan.type.wk <- d.partisan.t %>%
  #### get weekly counts of partisan x type phrases
  filter(between(timestamp_ymd, STUDY_START_YMD, STUDY_END_YMD)) %>%  
  mutate(timestamp_wk = format(as.Date(timestamp_ymd), "%W-%y"),
         year = substr(timestamp_ymd, 1, 4)) %>%
  group_by(year, timestamp_wk, party, type) %>%
  summarise(date=as.Date(first(timestamp_ymd)),
            n_pty_posts=sum(count),
            .groups = "drop") %>%
  #### get weekly counts of all posts
  full_join(d.t %>%
              mutate(timestamp_wk = format(as.Date(date), "%W-%y"),
                     year = substr(date, 1, 4)) %>%
              group_by(year, timestamp_wk) %>%
              summarise(n_brand_posts = sum(n_brand_posts),
                        .groups = "drop"),
            by = c("year", "timestamp_wk")) %>%
  #### compute %
  group_by(year, timestamp_wk, date, party, type) %>%
  summarise(n_posts = sum(n_brand_posts),
            pct_pty_posts = n_pty_posts/n_posts,
            .groups="drop") %>%
  mutate(type = str_to_title(type))

  #### get weekly counts of partisan x type phrases,
  #### but measured as % of brands
  
d.partisan.type.brand.wk <- Brand_Msgs_Q$dfm                         %T>% { cat("\r[1/13]") } %>%
  #### group original dfm by date
  dfm_group(groups=paste(docvars(Brand_Msgs_Q$dfm)$timestamp_ymd, 
                         docvars(Brand_Msgs_Q$dfm)$yougov_name))     %T>% { cat("\r[2/13]") } %>%  
  #### subset to period
  dfm_subset(between(timestamp_ymd, STUDY_START_YMD, STUDY_END_YMD)) %T>% { cat("\r[3/13]") } %>%
  #### extract selected partisan phrases
  dfm_select(pattern=gsub(" ","_", 
                          scaled_text$partisan_bigrams$feature))     %T>% { cat("\r[4/13]") } %>%
  convert(to="data.frame")                                           %T>% { cat("\r[5/13]") } %>%
  gather(key="feature", value="count", -doc_id)                      %T>% { cat("\r[6/13]") } %>%
  filter(count != 0)                                                 %T>% { cat("\r[7/13]") } %>%
  #### id by YMD date
  mutate(timestamp_ymd = substr(doc_id, 1, 10))                      %T>% { cat("\r[8/13]") } %>%
  mutate(yougov_name = gsub("^.{11}", "", doc_id))                   %T>% { cat("\r[9/13]") } %>%
  mutate(feature = gsub("_"," ", feature))                           %T>% { cat("\r[10/13]") } %>%
  group_by(timestamp_ymd, feature)                                   %T>% { cat("\r[11/13]") } %>%
  summarise(count_brands = length(unique(yougov_name)),
            .groups = "drop")                                        %T>% { cat("\r[12/13]") } %>%
  #### merge with type (e.g. issue, event)
  left_join(partisan_bigrams_bytype, by = "feature")                 %T>% { cat("\r[13/13]") } %>%
  #### merge with partisan keyness
  left_join(scaled_text$partisan_bigrams %>%
              as.data.frame() %>%
              mutate(feature = gsub("_"," ",feature)) %>%
              select(feature, chi2, n_target, n_reference), 
            by = "feature")

### unit: brand b x year y  ----

d.brands.y <- d.brands %>%
  select(-starts_with("ideal")) %>%
  crossing(data.frame(year=STUDY_YEARS))

d.brands.y.next <- data.frame()
for (y in STUDY_YEARS) {
  d.brands.y.next <- d.brands.y.next %T>%
    { message(sprintf("++ %i only", y)) } %>%
    bind_rows(scaled_text_alt[[sprintf("year_%i_only",y)]]$brands_bigrams_nonpara %>%
                orient_idealpts_brands(ideal_col = "ideal", scale = F) %>%
                mutate(year=y) %>%
                mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
                rename_at(vars(-matches("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
                rename_at(vars(-matches("yougov_name|year")), ~gsub("ideal","ideal.main",.x)))
}
d.brands.y <- d.brands.y %>%
  left_join(d.brands.y.next, by = c("yougov_name","year"))

d.brands.y <- d.brands.y %T>%
  { message("++ Yearly: Firm lobbying") } %>%
  select(-starts_with("legis")) %>%
  left_join(scaled_legis$brands_yearly_nonpara %>% 
              select(yougov_name, year, legis.D_frac=D_frac, legis.n = n_legis, legis.dwnom1_mean = Nominate_dim1_mean, legis.dwnom1_sd = Nominate_dim1_sd),
            by = c("yougov_name","year")) %T>%

  { message("++ Yearly: Employee Contributions (FEC)") } %>%
  select(-starts_with("FEC")) %>%
  left_join(scaled_contrib_FEC$brands_yearly_nonpara %>%
              rename(year = TRANSACTION_YR) %>%
              rename_at(vars(-matches("yougov_name|year")), ~paste0("FEC.", .x)),
            by = c("yougov_name","year")) %T>%
  
  { message("++ Yearly: Employee Contributions (OpenSecrets)") } %>%
  select(-starts_with("opsec")) %>%
  left_join(scaled_contrib_opsec$brands_yearly_nonpara %>%
              rename_at(vars(-matches("yougov_name|year")), ~paste0("opsec.", .x)),
            by = c("yougov_name","year")) %T>%
  
  { message("++ Yearly: Employee Contributions (Stuckatz 2021)") } %>%
  select(-starts_with("stkz21")) %>%
  left_join(scaled_contrib_stuckatz2021$brands_yearly_nonpara %>%
              rename_at(vars(-matches("yougov_name|year")), ~paste0("stkz21.", .x)),
            by = c("yougov_name","year")) %T>%
  
  { message("++ Yearly: Twitter Followers (social-listening.org)") } %>%
  select(-starts_with("sl")) %>%
  left_join(sl_brands_followers %>%
              crossing(data.frame(year=STUDY_YEARS)) %>%
              filter((sl_Year_Month == "2017-02" & year <= 2019) | (sl_Year_Month == "2017-02" & year >= 2020)) %>%              
              rename_at(vars(starts_with("sl")), ~gsub("sl_","sl.",.x)) %>%
              rename_at(vars(starts_with("sl")), ~gsub("-","_",.x)) %>%
              select(yougov_name, year, starts_with("sl")),
            by = c("yougov_name","year"))

d.brands.y <- d.brands.y %>%
  select(-starts_with("R_don_share")) %>%
  mutate(stkz21.don = stkz21.don_govt + stkz21.don_exec + stkz21.don_prmkt + stkz21.don_rnf) %>%
  mutate(stkz21.R_share = ((stkz21.don_govt*stkz21.R_share_govt) + (stkz21.don_exec*stkz21.R_share_exec) + (stkz21.don_prmkt*stkz21.R_share_prmkt) + (stkz21.don_rnf*stkz21.R_share_rnf))/stkz21.don) %>%
  mutate(`R_don_share`                   = coalesce(FEC.R_don_share, stkz21.R_share, opsec.R_share.indiv_dollars),
         `R_don_share.Board_Member`      = FEC.R_don_share.Board_Member,
         `R_don_share.Managers`          = FEC.R_don_share.Managers,
         `R_don_share.Legal`             = FEC.R_don_share.Legal,
         `R_don_share.Human_Resources`   = FEC.R_don_share.Human_Resources,
         `R_don_share.Top_Exec`          = coalesce(FEC.R_don_share.Top_Exec, stkz21.R_share_exec),
         `R_don_share.Public_Relations`  = coalesce(FEC.R_don_share.Public_Relations, stkz21.R_share_govt),
         `R_don_share.Marketing`         = coalesce(FEC.R_don_share.Marketing, stkz21.R_share_prmkt),
         `R_don_share.Rank_and_File`     = coalesce(FEC.R_don_share.Rank_and_File, stkz21.R_share_rnf))

d.brands.y <- d.brands.y %>%
  mutate(legis.R_frac = 1-legis.D_frac)

### HQs
zipcode_house_dw <- readRDS("data_interim/zipcode_house_dw.rds")
zipcode_sen_dw   <- readRDS("data_interim/zipcode_sen_dw.rds")

d.brands.y <- d.brands.y %>%
  select(-starts_with("cong_house")) %T>%
  
  { message("++ Yearly: HQ House Rep Ideology") } %>%
  mutate(hq_uszip = ifelse(is.na(hq_uszip), NA, sprintf("%05d", as.numeric(hq_uszip)))) %>%
  left_join(zipcode_house_dw %>%
              filter(!grepl("failed to decode utf16", zip)) %>%
              group_by(year, zip) %>%
              summarise(cong_house_dw_mean = mean(nominate_dim1),
                        .groups = "drop"),
            by = c("hq_uszip"="zip", "year")) %>%
  group_by(yougov_name, hq_uszip) %>%
  fill(cong_house_dw_mean) %>%
  ungroup()

d.brands.y <- d.brands.y %>%
  select(-starts_with("cong_sen")) %T>%
  
  { message("++ Yearly: HQ Senate Rep Ideology") } %>%
  left_join(zipcode_sen_dw %>%
              filter(!grepl("failed to decode utf16", zip), !is.na(bioname)) %>%
              distinct(year, state_abbrev, bioname, nominate_dim1) %>%
              group_by(year, state_abbrev) %>%
              summarise(cong_sen_dw_mean = mean(nominate_dim1),
                        .groups = "drop"),
            by = c("hq_usstate"="state_abbrev", "year")) %>%
  group_by(hq_usstate) %>%
  complete(year = full_seq(STUDY_YEARS, 1)) %>%
  fill(cong_sen_dw_mean) %>%
  ungroup()

d.brands.y <- d.brands.y %T>%
  { message("++ Yearly: All Stakeholders") } %>%
  ### sanitize data
  rowwise() %>%
  mutate(hq_pres.REP.stfips = mean(c(1-hq_Pres.DEM.08.stfips, 1-hq_Pres.DEM.12.stfips, 1-hq_Pres.DEM.16.stfips), na.rm=T)) %>%
  mutate(hq_pres.REP.zip = mean(c(1-hq_Pres.DEM.08.zipcode, 1-hq_Pres.DEM.12.zipcode, 1-hq_Pres.DEM.16.zipcode), na.rm=T)) %>%
  mutate(hq_pres.REP = coalesce(hq_pres.REP.zip, hq_pres.REP.stfips)) %>%
  mutate(loc.REP = coalesce(sg.pres.REP, zi.pres.REP)) %>%
  select(-starts_with("stkhl")) %>%
  mutate(stkhl.hq.cong_sen.R = cong_sen_dw_mean > 0,
         stkhl.hq.cong_house.R = cong_house_dw_mean > 0,
         stkhl.hq.voters.R = hq_pres.REP > 0.5,
         stkhl.loc.R = loc.REP > 0.5,
         stkhl.followers.R = twitter.foll_ideo_slant > 0,
         stkhl.affil.exec.R = FEC.donors_R.Top_Exec > 0.5,
         stkhl.affil.rank_and_file.R = FEC.donors_R.Rank_and_File > 0.5,
         stkhl.affil.pr.R = FEC.donors_R.Public_Relations > 0.5,
         stkhl.affil.board.R = FEC.donors_R.Board_Member > 0.5)

d.brands.y$stkhl.R <- d.brands.y %>%
  select_at(vars(starts_with("stkhl"))) %>% as.data.frame() %>%
  rowMeans(., na.rm=T)

message("++ Yearly: GJF Violations")
gjf_co_violations <- readRDS("data_interim/goodjobsfirst_company_violations.rds")

gjf_co_violations.y <- gjf_co_violations %>%
  filter(gjf_penalty_year %in% STUDY_YEARS) %>%
  mutate(gjf_penalty_amt = as.numeric(gsub("(\\$|\\,)", "", gjf_penalty_amt))) %>%
  group_by(yougov_name, year=gjf_penalty_year, gjf_primary_offense) %>%
  summarise(num_offenses = n(),
            totl_penalty_amt = sum(gjf_penalty_amt, na.rm=T),
            .groups = "drop")

gjf_relv_offenses <- c(
  "agribusiness violation",
  "wage and hour violation",
  "workplace safety or health violation",
  "workplace whistleblower retaliation",
  "labor relations violation",
  "employment discrimination",
  "discriminatory practices (non-employment)",
  "consumer protection violation",
  "Family and Medical Leave Act",
  "environmental violation"
)

gjf_co_violations.wide.y <- gjf_co_violations.y %>%
  rename(gjf.n_off = num_offenses, gjf.totl_penalty_amt = totl_penalty_amt) %>%
  pivot_wider(names_from = gjf_primary_offense, values_from = c(gjf.n_off, gjf.totl_penalty_amt), names_sep=".") %>%
  ungroup()

d.brands.y <- d.brands.y %>%
  select(-starts_with("gjf")) %>%
  left_join(gjf_co_violations.wide.y, by = c("yougov_name","year"))

d.brands.y <- d.brands.y %>%
  mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
  mutate(`gjf.n_off.labor`   = `gjf.n_off.wage and hour violation`+`gjf.n_off.labor relations violation`+`gjf.n_off.workplace safety or health violation`+`gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`) %>%
  mutate(`gjf.n_off.discr`   = `gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`) %>%
  mutate(`gjf.n_off.environ` = `gjf.n_off.environmental violation`+`gjf.n_off.energy conservation violation`)

message("++ Yearly: CDP scores")
cdp_brand_climate_scores <- readRDS("data_interim/cdp_brand_climate_scores.rds")
cdp_score_grades <- rev(sort(unique(cdp_brand_climate_scores$cdp_score)))
cdp_brand_climate_scores$cdp_score <- factor(cdp_brand_climate_scores$cdp_score, levels = cdp_score_grades)

cdp_brand_climate_avg_scores.y <- cdp_brand_climate_scores %>%
  group_by(yougov_name, year=cdp_score_year) %>%
  summarise(cdp_avg_score = mean(as.numeric(cdp_score)), .groups = "drop")

d.brands.y <- d.brands.y %>%
  select(-starts_with("cdp")) %>%
  left_join(cdp_brand_climate_avg_scores.y, by = c("yougov_name", "year"))

message("++ Yearly: HRC scores")
hrc_brand_equality_ratings <- readRDS("data_interim/hrc_brand_equality_ratings.rds")

d.brands.y <- d.brands.y %>%
  select(-starts_with("hrc_")) %>%
  left_join(hrc_brand_equality_ratings %>%
              arrange(yougov_name, year) %>%
              group_by(yougov_name) %>%
              fill(hrc_rating) %>%
              fill(hrc_state) %>%
              select(yougov_name, year, hrc_state, hrc_rating), by = c("yougov_name","year"))

### unit: brand b x [pre-2020, post-2020]  ----

d.brands.pre.post <- d.brands %>%
  select(-starts_with("ideal")) %>%
  crossing(data.frame(period=c("2014-2019","2020-2022")))

stopifnot(nrow(d.brands.pre.post) == nrow(distinct(d.brands.pre.post)))

d.brands.pre.post.next <- data.frame()
d.brands.pre.post.next <- d.brands.pre.post.next %T>%
  { message("++ Pre-2020") } %>%
  bind_rows(scaled_text_alt$`2014_2019`$brands_bigrams_nonpara %>%
              orient_idealpts_brands(ideal_col = "ideal", scale = F) %>%
              mutate(period="2014-2019") %>%
              mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal","ideal.main",.x)))

d.brands.pre.post.next <- d.brands.pre.post.next %T>%
  { message("++ Post-2020") } %>%
  bind_rows(scaled_text_alt$`2020-2022`$brands_bigrams_nonpara %>%
              orient_idealpts_brands(ideal_col = "ideal", scale = F) %>%
              mutate(period="2020-2022") %>%
              mutate(ideal.ci.95.lwr=ideal-1.96*ideal.se, ideal.ci.95.upr=ideal+1.96*ideal.se) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("idealbin","ideal.bin",.x)) %>%
              rename_at(vars(-starts_with("yougov_name")), ~gsub("ideal","ideal.main",.x)))

d.brands.pre.post <- d.brands.pre.post %>%
  left_join(d.brands.pre.post.next, by = c("yougov_name","period"))

d.brands.pre.post <- d.brands.pre.post %T>%
  { message("++ Pre/Post:Firm lobbying") } %>%
  select(-starts_with("legis")) %>%
  left_join(scaled_legis$brands_yearly_nonpara %>% 
              mutate(period = case_when(year %in% 2014:2019 ~ "2014-2019",
                                        year %in% 2020:2022 ~ "2020-2022")) %>%
              select(yougov_name, period, legis.D_frac=D_frac, legis.n = n_legis, legis.dwnom1_mean = Nominate_dim1_mean, legis.dwnom1_sd = Nominate_dim1_sd) %>%
              group_by(yougov_name, period) %>%
              summarise(legis.D_frac=mean(legis.D_frac), legis.n=mean(legis.n), legis.dwnom1_mean=mean(legis.dwnom1_mean), legis.dwnom1_sd=mean(legis.dwnom1_sd), .groups = "drop"),
            by = c("yougov_name","period")) %T>%
  
  { message("++ Pre/Post:Employee Contributions (FEC)") } %>%
  select(-starts_with("FEC")) %>%
  left_join(scaled_contrib_FEC$brands_yearly_nonpara %>%
              rename(year = TRANSACTION_YR) %>%
              rename_at(vars(-matches("yougov_name|year")), ~paste0("FEC.", .x)) %>%
              mutate(period = case_when(year %in% 2014:2019 ~ "2014-2019",
                                        year %in% 2020:2022 ~ "2020-2022",
                                        TRUE ~ NA_character_)) %>%              
              group_by(yougov_name, period) %>%
              summarise_if(is.numeric, mean),
            by = c("yougov_name","period")) %T>%
  
  { message("++ Pre/Post:Employee Contributions (OpenSecrets)") } %>%
  select(-starts_with("opsec")) %>%
  left_join(scaled_contrib_opsec$brands_yearly_nonpara %>%
              rename_at(vars(-matches("yougov_name|year")), ~paste0("opsec.", .x)) %>%
              mutate(period = case_when(year %in% 2014:2019 ~ "2014-2019",
                                        year %in% 2020:2022 ~ "2020-2022",
                                        TRUE ~ NA_character_)) %>%
              group_by(yougov_name, period) %>%
              summarise_if(is.numeric, mean),
            by = c("yougov_name","period")) %T>%
  
  { message("++ Pre/Post:Employee Contributions (Stuckatz 2021)") } %>%
  select(-starts_with("stkz21")) %>%
  left_join(scaled_contrib_stuckatz2021$brands_yearly_nonpara %>%
              rename_at(vars(-matches("yougov_name|year")), ~paste0("stkz21.", .x)) %>%
              mutate(period = case_when(year %in% 2014:2019 ~ "2014-2019",
                                        year %in% 2020:2022 ~ "2020-2022",
                                        TRUE ~ NA_character_)) %>%
              group_by(yougov_name, period) %>%
              summarise_if(is.numeric, mean),
            by = c("yougov_name","period")) %T>%
  
  { message("++ Pre/Post:Twitter Followers (social-listening.org)") } %>%
  select(-starts_with("sl")) %>%
  left_join(sl_brands_followers %>%
              crossing(data.frame(period=c("2014-2019","2020-2022"))) %>%
              filter((sl_Year_Month == "2017-02" & period == "2014-2019") | (sl_Year_Month == "2017-02" & period == "2020-2022")) %>%              
              rename_at(vars(starts_with("sl")), ~gsub("sl_","sl.",.x)) %>%
              rename_at(vars(starts_with("sl")), ~gsub("-","_",.x)) %>%
              select(yougov_name, period, starts_with("sl")),
            by = c("yougov_name","period"))

d.brands.pre.post <- d.brands.pre.post %>%
  select(-starts_with("R_don_share")) %>%
  mutate(stkz21.don = stkz21.don_govt + stkz21.don_exec + stkz21.don_prmkt + stkz21.don_rnf) %>%
  mutate(stkz21.R_share = ((stkz21.don_govt*stkz21.R_share_govt) + (stkz21.don_exec*stkz21.R_share_exec) + (stkz21.don_prmkt*stkz21.R_share_prmkt) + (stkz21.don_rnf*stkz21.R_share_rnf))/stkz21.don) %>%
  mutate(`R_don_share`                   = coalesce(FEC.R_don_share, stkz21.R_share, opsec.R_share.indiv_dollars),
         `R_don_share.Board_Member`      = FEC.R_don_share.Board_Member,
         `R_don_share.Managers`          = FEC.R_don_share.Managers,
         `R_don_share.Legal`             = FEC.R_don_share.Legal,
         `R_don_share.Human_Resources`   = FEC.R_don_share.Human_Resources,
         `R_don_share.Top_Exec`          = coalesce(FEC.R_don_share.Top_Exec, stkz21.R_share_exec),
         `R_don_share.Public_Relations`  = coalesce(FEC.R_don_share.Public_Relations, stkz21.R_share_govt),
         `R_don_share.Marketing`         = coalesce(FEC.R_don_share.Marketing, stkz21.R_share_prmkt),
         `R_don_share.Rank_and_File`     = coalesce(FEC.R_don_share.Rank_and_File, stkz21.R_share_rnf))

d.brands.pre.post <- d.brands.pre.post %>%
  mutate(legis.R_frac = 1-legis.D_frac)

### HQs
stopifnot(nrow(d.brands.pre.post) == nrow(distinct(d.brands.pre.post)))

zipcode_house_dw <- readRDS("data_interim/zipcode_house_dw.rds")
zipcode_sen_dw   <- readRDS("data_interim/zipcode_sen_dw.rds")

d.brands.pre.post <- d.brands.pre.post %>%
  select(-starts_with("cong_house")) %T>%
  
  { message("++ Pre/Post:HQ House Rep Ideology") } %>%
  mutate(hq_uszip = ifelse(is.na(hq_uszip), NA, sprintf("%05d", as.numeric(hq_uszip)))) %>%
  left_join(zipcode_house_dw %>%
              filter(!grepl("failed to decode utf16", zip)) %>%
              mutate(period = case_when(year %in% 2014:2019 ~ "2014-2019",
                                        year %in% 2020:2022 ~ "2020-2022",
                                        TRUE ~ NA_character_)) %>%              
              group_by(period, zip) %>%
              summarise(cong_house_dw_mean = mean(nominate_dim1),
                        .groups = "drop"),
            by = c("hq_uszip"="zip", "period"))

d.brands.pre.post <- d.brands.pre.post %>%
  select(-starts_with("cong_sen")) %T>%
  
  { message("++ Pre/Post:HQ Senate Rep Ideology") } %>%
  left_join(zipcode_sen_dw %>%
              filter(!grepl("failed to decode utf16", zip), !is.na(bioname)) %>%
              distinct(year, state_abbrev, bioname, nominate_dim1) %>%
              mutate(period = case_when(year %in% 2014:2019 ~ "2014-2019",
                                        year %in% 2020:2022 ~ "2020-2022",
                                        TRUE ~ NA_character_)) %>%                
              group_by(period, state_abbrev) %>%
              summarise(cong_sen_dw_mean = mean(nominate_dim1),
                        .groups = "drop"),
            by = c("hq_usstate"="state_abbrev", "period"))

d.brands.pre.post <- d.brands.pre.post %T>%
  { message("++ All Stakeholders") } %>%
  ### sanitize data
  rowwise() %>%
  mutate(hq_pres.REP.stfips = mean(c(1-hq_Pres.DEM.08.stfips, 1-hq_Pres.DEM.12.stfips, 1-hq_Pres.DEM.16.stfips), na.rm=T)) %>%
  mutate(hq_pres.REP.zip = mean(c(1-hq_Pres.DEM.08.zipcode, 1-hq_Pres.DEM.12.zipcode, 1-hq_Pres.DEM.16.zipcode), na.rm=T)) %>%
  mutate(hq_pres.REP = coalesce(hq_pres.REP.zip, hq_pres.REP.stfips)) %>%
  mutate(loc.REP = coalesce(sg.pres.REP, zi.pres.REP)) %>%
  select(-starts_with("stkhl")) %>%
  mutate(stkhl.hq.cong_sen.R = cong_sen_dw_mean > 0,
         stkhl.hq.cong_house.R = cong_house_dw_mean > 0,
         stkhl.hq.voters.R = hq_pres.REP > 0.5,
         stkhl.loc.R = loc.REP > 0.5,
         stkhl.followers.R = twitter.foll_ideo_slant > 0,
         stkhl.affil.exec.R = FEC.donors_R.Top_Exec > 0.5,
         stkhl.affil.rank_and_file.R = FEC.donors_R.Rank_and_File > 0.5,
         stkhl.affil.pr.R = FEC.donors_R.Public_Relations > 0.5,
         stkhl.affil.board.R = FEC.donors_R.Board_Member > 0.5)

d.brands.pre.post$stkhl.R <- d.brands.pre.post %>%
  select_at(vars(starts_with("stkhl"))) %>% as.data.frame() %>%
  rowMeans(., na.rm=T)

message("++ GJF Violations")
stopifnot(nrow(d.brands.pre.post) == nrow(distinct(d.brands.pre.post)))

gjf_co_violations <- readRDS("data_interim/goodjobsfirst_company_violations.rds")

gjf_co_violations.pre.post <- gjf_co_violations %>%
  filter(gjf_penalty_year %in% STUDY_YEARS) %>%
  mutate(gjf_penalty_amt = as.numeric(gsub("(\\$|\\,)", "", gjf_penalty_amt))) %>%
  mutate(period = case_when(gjf_penalty_year %in% 2014:2019 ~ "2014-2019",
                            gjf_penalty_year %in% 2020:2022 ~ "2020-2022",
                            TRUE ~ NA_character_)) %>%  
  group_by(yougov_name, period, gjf_primary_offense) %>%
  summarise(num_offenses = n(),
            totl_penalty_amt = sum(gjf_penalty_amt, na.rm=T),
            .groups = "drop")

gjf_relv_offenses <- c(
  "agribusiness violation",
  "wage and hour violation",
  "workplace safety or health violation",
  "workplace whistleblower retaliation",
  "labor relations violation",
  "employment discrimination",
  "discriminatory practices (non-employment)",
  "consumer protection violation",
  "Family and Medical Leave Act",
  "environmental violation"
)

gjf_co_violations.wide.pre.post <- gjf_co_violations.pre.post %>%
  rename(gjf.n_off = num_offenses, gjf.totl_penalty_amt = totl_penalty_amt) %>%
  pivot_wider(names_from = gjf_primary_offense, values_from = c(gjf.n_off, gjf.totl_penalty_amt), names_sep=".") %>%
  ungroup()

d.brands.pre.post <- d.brands.pre.post %>%
  select(-starts_with("gjf")) %>%
  left_join(gjf_co_violations.wide.pre.post, by = c("yougov_name","period"))

d.brands.pre.post <- d.brands.pre.post %>%
  mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
  mutate(`gjf.n_off.labor`   = `gjf.n_off.wage and hour violation`+`gjf.n_off.labor relations violation`+`gjf.n_off.workplace safety or health violation`+`gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`) %>%
  mutate(`gjf.n_off.discr`   = `gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`) %>%
  mutate(`gjf.n_off.environ` = `gjf.n_off.environmental violation`+`gjf.n_off.energy conservation violation`)

message("++ CDP scores")
stopifnot(nrow(d.brands.pre.post) == nrow(distinct(d.brands.pre.post)))

cdp_brand_climate_scores <- readRDS("data_interim/cdp_brand_climate_scores.rds")
cdp_score_grades <- rev(sort(unique(cdp_brand_climate_scores$cdp_score)))
cdp_brand_climate_scores$cdp_score <- factor(cdp_brand_climate_scores$cdp_score, levels = cdp_score_grades)

cdp_brand_climate_avg_scores.pre.post <- cdp_brand_climate_scores %>%
  mutate(period = case_when(cdp_score_year %in% 2014:2019 ~ "2014-2019",
                            cdp_score_year %in% 2020:2022 ~ "2020-2022",
                            TRUE ~ NA_character_)) %>%  
  group_by(yougov_name, period) %>%
  summarise(cdp_avg_score = mean(as.numeric(cdp_score)), .groups = "drop")

d.brands.pre.post <- d.brands.pre.post %>%
  select(-starts_with("cdp")) %>%
  left_join(cdp_brand_climate_avg_scores.pre.post, by = c("yougov_name", "period"))

message("++ HRC scores")
stopifnot(nrow(d.brands.pre.post) == nrow(distinct(d.brands.pre.post)))

hrc_brand_equality_ratings <- readRDS("data_interim/hrc_brand_equality_ratings.rds")

d.brands.pre.post <- d.brands.pre.post %>%
  select(-starts_with("hrc_")) %>%
  left_join(hrc_brand_equality_ratings %>%
              mutate(period = case_when(year %in% 2014:2019 ~ "2014-2019",
                                        year %in% 2020:2022 ~ "2020-2022",
                                        TRUE ~ NA_character_)) %>%              
              arrange(yougov_name, year) %>%
              group_by(yougov_name) %>%
              fill(hrc_rating) %>%
              fill(hrc_state) %>%
              select(yougov_name, period, hrc_state, hrc_rating), by = c("yougov_name","period"))

if (RUN_INTERACTIVE) stop("Run the rest interactively")

# 2.) BASIC DESCRIPTIVES -----------------------------------------------------

d.tbl <- d.brands %>%
	filter(!is.na(tw_account) | !is.na(ig_account)) %>%
	mutate(parent = ifelse(is.na(parent), yougov_name, parent)) %>%
	select(b=yougov_name, y=yougov_pct_recognition, p=parent, tw=tw_handle, ig=ig_account) %>%
	distinct()

fmt <- \(.) gsub("&", "\\&", ., fixed=TRUE)
fmt_t <- \(.) ifelse(is.na(.) | .=="@NA", "", sprintf("\\texttt{%s}", gsub("_","\\_", ., fixed=TRUE)))
for (i in 1:nrow(d.tbl)) {
	cat(sprintf("\n%d & %s & %s & %s & %s & %s \\\\ \\hline",
				i,
				fmt(d.tbl$b[i]), fmt(d.tbl$p[i]), paste0(d.tbl$y[i],"\\%"), 
				fmt_t(d.tbl$tw[i]), fmt_t(paste0("@",d.tbl$ig[i]))),
		file="brand_table.tex", append=TRUE)
}

## 2.0. Counts ----------------------------------------------------------------

nrow(d.brands) #1000
sum(!is.na(d.brands$tw_account) | !is.na(d.brands$ig_account)) #880
sum(!is.na(d.brands$tw_account)) #803
sum(!is.na(d.brands$ig_account)) #523
sum(d.brands$ideal.main.n > 0, na.rm=T) #645
sum(!is.na(d.brands$ideal.main), na.rm=T) #645
sum(d.brands$ideal.main.n > 5, na.rm=T) #450
sum(d.brands$ideal.main.n > 15, na.rm=T) #295

table(!is.na(d.brands$opsec.R.total_dollars)) #666
table(!is.na(d.brands$opsec.R.total_dollars)[!is.na(d.brands$tw_account) | !is.na(d.brands$ig_account)]) #634
table(d.brands$legis.n > 0) #382
table(d.brands$legis.n[!is.na(d.brands$tw_account) | !is.na(d.brands$ig_account)] > 0) #356

d.text %>%
  group_by(color) %>%
  summarise(n_mentions = sum(n_mentions),
            .groups = "drop") %>%
  mutate(share_mentions = n_mentions/sum(n_mentions))

d.text %>%
	#filter(!is.na(n_mentions)) %>%
	group_by(type) %>%
	summarise(n_mentions = sum(n_mentions), 
			  n_phrases = n(),
			  .groups = "drop") %>%
	mutate(pct_mentions = n_mentions/sum(n_mentions),
		   share_mentions = n_mentions/sum(n_mentions[!is.na(type)])) %>%
	arrange(desc(pct_mentions))


### missingness in stakeholder data
mean(is.na(d.brands$FEC.D_don_share[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$twitter.foll_ideo_slant[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$sl.Dem_Lift.2022_10[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$hq_pres.REP[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$hq_pres.REP.county[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$sg.pres.REP[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$zi.pres.REP[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$zi.pres.REP[!is.na(d.brands$ideal.main)]) &
	 	is.na(d.brands$sg.pres.REP[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$zippia_url))
mean(is.na(d.brands$yougov_pct_pos_opinion))

### missingness in agenda data
mean(is.na(d.brands$cdp_avg_score[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$`clm100_discl.1.1 Metric assessment.a.Mar-21`[!is.na(d.brands$ideal.main)]))
mean(is.na(d.brands$opsec.cand.indiv_dollars))
mean(is.na(d.brands$`gjf.n_off.environmental violation`))
mean(is.na(d.brands$gd.Gender.Men.avg_rating))


## 2.1. Examples --------------------------------------------------------------

glimpse(filter(d.brands, yougov_name == "Chevron"))

glimpse(filter(d.brands, yougov_name == "Whole Foods Market"))
scaled_text$dfm_brand_partisan_df_long %>%
  filter(yougov_name == "Whole Foods Market", count > 0)
Brand_Msgs %>% 
  filter(yougov_name == "Whole Foods Market") %>% 
  filter(grepl("health.*care", text, ignore.case=T)) %>% 
  select(text)

glimpse(filter(d.brands, yougov_name == "Marriott"))
scaled_text$dfm_brand_partisan_df_long %>%
  filter(yougov_name == "Marriott", count > 0)
Brand_Msgs %>% 
  filter(yougov_name == "Marriott") %>% 
  filter(grepl("climat.*chang", text, ignore.case=T)) %>% 
  select(text)


## 2.2. Outliers --------------------------------------------------------------

d.brands %>% ## most left-leaning brands
  arrange(ideal) %>%
  select(yougov_name, ideal) %>% 
  head(10) %>%
  as.data.frame()

d.brands %>% ## text from most left leaning brands
  arrange(ideal) %>%
  select(yougov_name, ideal) %>% 
  head(10) %>%
  inner_join(scaled_text$dfm_brand_partisan_df_long %>%
               filter(count > 0), by = "yougov_name") %>%
  arrange(yougov_name, ideal, chi2) %>%
  group_by(yougov_name) %>%
  filter(row_number() < 10) %>%
  ungroup() %>%
  as.data.frame()

d.brands %>% ## most right-leaning brands
  arrange(-ideal) %>% 
  select(yougov_name, ideal) %>% 
  head(10) %>%
  as.data.frame()

d.brands %>% ## text from most left leaning brands
  arrange(-ideal) %>%
  select(yougov_name, ideal) %>% 
  head(10) %>%
  inner_join(scaled_text$dfm_brand_partisan_df_long %>%
               filter(count > 0), by = "yougov_name") %>%
  arrange(yougov_name, -ideal, -chi2) %>%
  group_by(yougov_name) %>%
  filter(row_number() < 10) %>%
  ungroup() %>%
  as.data.frame()

## 2.3. Stakeholder misalignment ----------------------------------------------

table(d.brands$FEC.R_don_share.Top_Exec > d.brands$FEC.R_don_share.Rank_and_File, 
      useNA="always")

table(d.brands$FEC.R_don_share.Rank_and_File > d.brands$twitter.foll_ideo_slant, 
      useNA="always")

## % of brands where online aligns with all stakeholders
prop.table(table(
  (d.brands$ideal.main < 0) == (d.brands$stkhl.R < 0.5)
))
## % of brands where online aligns with PAC spending
prop.table(table(
  (d.brands$ideal.main < 0) == (d.brands$opsec.R_share.org_dollars < 0.5)
))
## % of brands where online aligns with all stakeholders AND PAC spending
prop.table(table(
  ((d.brands$ideal.main < 0) == (d.brands$stkhl.R < 0.5)) & 
    ((d.brands$ideal.main < 0) == (d.brands$opsec.R_share.org_dollars < 0.5))
))
## % of brands where online aligns with all stakeholders AND candidate PAC spending
prop.table(table(
  ((d.brands$ideal.main < 0) == (d.brands$stkhl.R < 0.5)) & 
    ((d.brands$ideal.main < 0) == (d.brands$opsec.R_cand_share.org_dollars < 0.5))
))
## % of brands where online aligns AND stakeholders AND candidate PAC spending all align
prop.table(table(
  ((d.brands$ideal.main < 0) == (d.brands$stkhl.R < 0.5)) & 
    ((d.brands$ideal.main < 0) == (d.brands$opsec.R_cand_share.org_dollars < 0.5)) & 
    ((d.brands$stkhl.R < 0) == (d.brands$opsec.R_cand_share.org_dollars < 0.5))
))

## 2.4. Posts over time --------------------------------------------------------

### study period
p.t <- d.t %>%
  filter(between(date, STUDY_START_YMD, STUDY_END_YMD, F)) %>%
  ggplot(aes(x=date, y=n_brand_posts)) +
  geom_line() +
  geom_smooth() +
  xlab("") + 
  scale_y_continuous(name="Number of posts\n(on date)", trans="log10") +
  theme_bw() +
  theme_custom

p.site.t <- d.site.t %>%
  filter(between(date, STUDY_START_YMD, STUDY_END_YMD, F)) %>%
  ggplot(aes(x=date, y=n_brand_posts)) +
  geom_line() +
  geom_smooth() +
  facet_wrap(~ platform) +
  xlab("") + 
  scale_y_continuous(name="Number of posts\n(on date)", trans="log10") +
  theme_bw() +
  theme_custom

p.t.combined <- cowplot::plot_grid(p.t, p.site.t,
                                   nrow = 2)

ggsave_v(p.t.combined, 
         filename = "figures/overtime/brands_nposts_overtime.pdf", 
         height=5, width=8)

### overall (by posts)

p.all.t <- Brand_Msgs %>%
	mutate(date=as.Date(timestamp)) %>%
	group_by(date) %>%
	summarise(n_brand_posts = n(),
			  .groups = "drop") %>%
	ggplot(aes(x=date, y=n_brand_posts)) +
	geom_line() +
	geom_vline(xintercept=as.Date(STUDY_START_YMD), lty = 2, color = "red") +
	#geom_vline(xintercept=as.Date(STUDY_END_YMD), lty = 2, color = "red") +
	geom_smooth() +
	xlab("") + 
	scale_y_continuous(name="Number of posts\n(on date)", trans="log10") +
	theme_bw() +
	theme_custom

p.all.site.t <- Brand_Msgs %>%
	mutate(date=as.Date(timestamp)) %>%
	group_by(platform, date) %>%
	summarise(n_brand_posts = n(),
			  .groups = "drop") %>%
	ggplot(aes(x=date, y=n_brand_posts)) +
	geom_line() +
	geom_vline(xintercept=as.Date(STUDY_START_YMD), lty = 2, color = "red") +
	#geom_vline(xintercept=as.Date(STUDY_END_YMD), lty = 2, color = "red") +
	geom_smooth() +
	facet_wrap(~ platform) +
	xlab("") + 
	scale_y_continuous(name="Number of posts", trans="log10") +
	theme_bw() +
	theme_custom

p.all.t.combined <- cowplot::plot_grid(p.all.t, p.all.site.t,
									   nrow = 2)

ggsave_v(p.all.t.combined, 
		 filename = "figures/overtime/brands_all_nposts_overtime.pdf", 
		 height=5, width=8)

### overall (by brand)

p.brands.all.t <- Brand_Msgs %>%
	mutate(date=as.Date(timestamp)) %>%
	group_by(date) %>%
	summarise(n_brands = length(unique(yougov_name)),
			  .groups = "drop") %>%
	ggplot(aes(x=date, y=n_brands)) +
	geom_line() +
	geom_vline(xintercept=as.Date(STUDY_START_YMD), lty = 2, color = "red") +
	#geom_vline(xintercept=as.Date(STUDY_END_YMD), lty = 2, color = "red") +
	geom_smooth() +
	xlab("") + 
	scale_y_continuous(name="Number of brands with any posts\n(on date)", trans="log10") +
	theme_bw() +
	theme_custom

p.brands.all.site.t <- Brand_Msgs %>%
	mutate(date=as.Date(timestamp)) %>%
	group_by(platform, date) %>%
	summarise(n_brands = length(unique(yougov_name)),
			  .groups = "drop") %>%
	ggplot(aes(x=date, y=n_brands)) +
	geom_line() +
	geom_vline(xintercept=as.Date(STUDY_START_YMD), lty = 2, color = "red") +
	#geom_vline(xintercept=as.Date(STUDY_END_YMD), lty = 2, color = "red") +
	geom_smooth() +
	facet_wrap(~ platform) +
	xlab("") + 
	scale_y_continuous(name="Number of brands with any posts\n(on date)", trans="log10") +
	theme_bw() +
	theme_custom

p.brands.all.t.combined <- cowplot::plot_grid(
	p.brands.all.t, p.brands.all.site.t,
	nrow = 2
)

p.posts_n_brands.all.t.combined <- cowplot::plot_grid(
	p.all.t, p.brands.all.t,
	nrow = 2
)

ggsave_v(p.posts_n_brands.all.t.combined, 
		 filename = "figures/overtime/brands_overtime.pdf", 
		 height=5, width=8)

## 2.4. Correlation in ideal points --------------------------------------------

### Individual pairwise correlations
r.ideal.main.bin <- d.brands %>% ### full chi^2 vs. binarized chi^2
  measure_alignment(x.col="ideal.main", y.col="ideal.main.bin", lbl.col="yougov_name", 
                    annot.quads=TRUE, align.condition = ideal.main.n > MIN_PARTISAN_BIGRAMS)
ggsave_v(r.ideal.main.bin$plot +
           scale_x_continuous(name = expression(NULL %<-% "More D.             Non-Parametric             More R." %->% NULL)) +
           scale_y_continuous(name= expression(NULL %<-% "More D.             Non-Parametric (Binarized)             More R." %->% NULL)), 
         filename = "figures/idealpts/corr_main_bin.pdf",
         height=5, width=8)

r.ideal.main.mdl <- d.brands %>% ### non-param. vs. param.
  measure_alignment(x.col="ideal.main", y.col="ideal.mdl", lbl.col="yougov_name", 
                    annot.quads=TRUE)
ggsave_v(r.ideal.main.mdl$plot +
           scale_x_continuous(expression(NULL %<-% "More D.             Non-Parametric             More R." %->% NULL)) +
           scale_y_continuous(expression(NULL %<-% "More D.             Parametric             More R." %->% NULL)), 
         filename = "figures/idealpts/corr_main_mdl.pdf", 
         height=5, width=8)

r.ideal.tw.ig <- d.brands %>% ### twitter-only vs. instagram-only
  measure_alignment(x.col="ideal.tw", y.col="ideal.ig", lbl.col="yougov_name", 
                    annot.quads=TRUE)
ggsave_v(r.ideal.tw.ig$plot +
           scale_x_continuous(expression(NULL %<-% "More D.             Twitter Only             More R." %->% NULL)) +
           scale_y_continuous(expression(NULL %<-% "More D.             Instagram Only             More R." %->% NULL)), 
         filename = "figures/idealpts/corr_tw_ig.pdf", 
         height=5, width=8)

hist.ideal.tw.ig <- d.brands %>% ### twitter-only vs. instagram-only (histogram)
  select(Twitter=ideal.tw, Instagram=ideal.ig) %>%
  gather(key="site", value="ideal") %>%
  histogram_plot(x.col = "ideal", group.col = "site")
ggsave_v(hist.ideal.tw.ig$plot, 
         filename = "figures/idealpts/hist_tw_ig.pdf", 
         height=5, width=8)

r.ideal.main.issues <- d.brands %>% ### main vs. issues-only
  measure_alignment(x.col="ideal.main", y.col="ideal.issues", lbl.col="yougov_name",  
                    annot.quads=TRUE)
ggsave_v(r.ideal.main.issues$plot +
           scale_x_continuous(expression(NULL %<-% "More D.             Non-Parametric             More R." %->% NULL)) +
           scale_y_continuous(expression(NULL %<-% "More D.             Non-Parametric (Issues)             More R." %->% NULL)), 
         filename = "figures/idealpts/corr_main_issues.pdf", 
         height=5, width=8)

r.ideal.main.groups <- d.brands %>% ### main vs. groups-only
  measure_alignment(x.col="ideal.main", y.col="ideal.groups", lbl.col="yougov_name",  
                    annot.quads=TRUE)
ggsave_v(r.ideal.main.groups$plot +
           scale_x_continuous(expression(NULL %<-% "More D.             Non-Parametric             More R." %->% NULL)) +
           scale_y_continuous(expression(NULL %<-% "More D.             Non-Parametric (Groups)             More R." %->% NULL)), 
         filename = "figures/idealpts/corr_main_groups.pdf", 
         height=5, width=8)

r.ideal.issues.groups <- d.brands %>% ### issues-only vs. groups-only
  measure_alignment(x.col="ideal.groups", y.col="ideal.issues", lbl.col="yougov_name",  
                    annot.quads=TRUE) +
  scale_x_continuous(expression(NULL %<-% "More D.             Non-Parametric (Groups)             More R." %->% NULL)) +
  scale_y_continuous(expression(NULL %<-% "More D.             Non-Parametric (Issues)             More R." %->% NULL))
ggsave_v(r.ideal.issues.groups$plot, 
         filename = "figures/idealpts/corr_issues_groups.pdf", 
         height=5, width=8)

r.ideal.main.stances <- d.brands %>% ### main vs. stances-only
  measure_alignment(x.col="ideal.main", y.col="ideal.stances", lbl.col="yougov_name",  
                    annot.quads=TRUE) +
  scale_x_continuous(expression(NULL %<-% "More D.             Non-Parametric             More R." %->% NULL)) +
  scale_y_continuous(expression(NULL %<-% "More D.             Non-Parametric (Stances)             More R." %->% NULL))
ggsave_v(r.ideal.main.stances$plot, 
         filename = "figures/idealpts/corr_main_stances.pdf", 
         height=5, width=8)

if (FALSE) {
  r.ideal.main.bin$plot
  r.ideal.main.mdl$plot
  hist.ideal.tw.ig$plot
  r.ideal.main.groups$plot
  r.ideal.issues.groups$plot
  r.ideal.main.stances$plot
}

### Grid of distributions
type.labs <- list(
  `Non-Parametric`="ideal.main", 
  `Non-Parametric\n(Binarized)`="ideal.main.bin", 
  `Non-Parametric\n(Groups Only)`="ideal.groups", 
  `Non-Parametric\n(Issue Phrases Only)`="ideal.issues", 
  `Parametric\nModel`="ideal.mdl", 
  `Non-Parametric\n(Twitter Only)`="ideal.tw",
  `Non-Parametric\n(Instagram Only)`="ideal.ig"
); type.labs.0 <- names(type.labs); names(type.labs.0) <- unlist(type.labs); type.labs <- type.labs.0; rm(type.labs.0);

d.ideal.dists <- d.brands %>% 
  select(ideal.main, 
         ideal.main.bin, 
         ideal.groups, 
         ideal.issues, 
         ideal.mdl, 
         ideal.tw,
         ideal.ig) %>%
  gather(key="type", value="value") %>%
  group_by(type) %>%
  mutate(value = scale(value, center = FALSE)) %>%
  mutate(type = factor(type, levels = c("ideal.main",
                                        "ideal.main.bin",
                                        "ideal.groups",
                                        "ideal.issues",
                                        "ideal.mdl",
                                        "ideal.tw",
                                        "ideal.ig")))
p.ideal.dist <- d.ideal.dists %>%
  ggplot(aes(x=value)) +
  facet_grid(type ~ ., scales = "free_y", labeller = labeller(type = type.labs)) +
  ylab("Density") + 
  xlab("Standardized (non-centered)\ndistribution") +
  geom_density(alpha=0.5, color="grey", fill="grey") +
  geom_vline(data = d.ideal.dists %>%
               group_by(type) %>%
               summarise(mean = mean(value, na.rm=T)),
             aes(xintercept=mean), color="blue", lty=2) +
  geom_vline(xintercept=0, color="black", lty=1, alpha=0.6) +
  theme_custom_vertpanel + 
  theme(legend.position = "none")

### Grid of correlations
d.ideal.corr <- d.brands %>% 
  mutate(ideal.main.2 = ideal.main) %>%
  select(ideal.main,
         ideal.main.2,
         ideal.main.bin, 
         ideal.groups, 
         ideal.issues, 
         ideal.mdl, 
         ideal.tw,
         ideal.ig) %>%
  gather(key="type", value="value", -ideal.main.2) %>%
  group_by(type) %>%
  mutate(value = scale(value, center = FALSE)) %>%
  mutate(ideal.main.2 = scale(ideal.main.2, center = FALSE)) %>%
  mutate(type = factor(type, levels = c("ideal.main",
                                        "ideal.main.bin",
                                        "ideal.groups",
                                        "ideal.issues",
                                        "ideal.mdl",
                                        "ideal.tw",
                                        "ideal.ig")))
p.ideal.corr <- d.ideal.corr %>%
  ggplot(aes(x=ideal.main.2, y=value)) +
  facet_grid(type ~ ., scales = "free", labeller = labeller(type = type.labs)) +
  xlab("Standardized (non-centered) estimate of non-parametric estimate") +
  ylab("Standardized (non-centered) estimate") +
  geom_point(color="black", alpha = 0.3, shape=1) +
  geom_smooth(method = "lm", color = "purple", se = F) +
  geom_smooth(method = "loess", color = "orange", se = F) +
  geom_text(data = d.ideal.corr %>%
              group_by(type) %>%
              summarise(r = cor(value, ideal.main.2, use = "pairwise.complete.obs")),
            aes(x=-Inf, y=Inf, label=sprintf("r = %0.2f", r)), vjust=1.75, hjust=-0.25) +
  theme_custom_vertpanel + 
  theme(legend.position = "none")

p.ideal.cmp <- 
  cowplot::plot_grid(p.ideal.dist + 
                       theme(strip.background.y = element_blank(),
                             strip.text = element_text(size=0),
                             plot.margin = unit(c(0.1,-0.3,0.1,0), "cm")),
                     p.ideal.corr +
                       theme(plot.margin = unit(c(0.1,0,0.1,-2.75), "cm")),
                     align = "h")

ggsave_v(p.ideal.cmp, 
         filename = "figures/idealpts/cmp.pdf",
         width=10, height=7)

## 2.5. Jan 6th ----------------------------------------------------------------

sum(grepl("(insurrection| riot)", Brand_Msgs$text[Brand_Msgs$timestamp_ymd > "2021-01-06"], ignore.case=T))
sum(grepl("(capitol|insurrection| riot)", Brand_Msgs$text[Brand_Msgs$timestamp_ymd > "2021-01-06"], ignore.case=T))
sum(grepl("George.*Floyd", Brand_Msgs$text[Brand_Msgs$timestamp_ymd > "2020-05-25"], ignore.case=T))

Brand_Msgs %>%
  mutate(timestamp_wk = format(as.Date(timestamp_ymd), "%W-%y")) %>%
  mutate(jan6 = grepl("(capitol|insurrection| riot)", text, ignore.case=T)) %>%
  group_by(timestamp_wk) %>%
  summarise(jan6 = sum(jan6),
            timestamp_ymd = first(timestamp_ymd)) %>%
  ggplot(aes(x=as.Date(timestamp_ymd), y=jan6)) +
  geom_vline(xintercept=as.Date("2021-01-06"), color="red", lty=2) +
  geom_line() +
  theme_bw()

## 2.6. Covariate distributions -----------------------------------------------

### stakeholders
stkhl.vars <- c("R_don_share",
                "R_don_share.Board_Member",
                "R_don_share.Managers",
                "R_don_share.Legal",
                "R_don_share.Human_Resources",
                "R_don_share.Top_Exec",
                "R_don_share.Public_Relations",
                "R_don_share.Marketing",
                "R_don_share.Rank_and_File",
                "twitter.foll_ideo_slant",
                "sl.Rep_Pct.2017_02",
                "sl.Rep_Pct.2022_10",
                "hq_pres.REP", 
                "zi.pres.REP", 
                "sg.pres.REP",
                "cong_house_dw_mean",
                "cong_sen_dw_mean",
                "stkhl.R")

d.cov.stkhl <- d.brands %>%
  select_at(c("yougov_name", stkhl.vars)) %>%
  gather(key="cov", value="val", -yougov_name) %>%
  mutate(cov1 = factor(cov, levels = stkhl.vars)) %>%
  mutate(cov2 = sanitize_var(cov, multi_line = TRUE)) %>%
  arrange(cov1) %>%
  mutate(cov2 = as_factor(cov2))
  
p.cov.stkhl <- d.cov.stkhl %>%
  ggplot(aes(x=val)) +
  facet_wrap(~ cov2, scales = "free") +
  geom_density(fill="gray", color="gray", alpha=0.5) +
  scale_x_continuous(name = "") +
  scale_y_continuous(name = "") +
  geom_vline(data = d.cov.stkhl %>%
               group_by(cov2) %>%
               summarise(xmean = mean(val, na.rm=T)),
             aes(xintercept = xmean), lty=2, alpha=0.5) +
  geom_label(data = d.cov.stkhl %>%
               group_by(cov2) %>%
               summarise(xmean = mean(val, na.rm=T)),
             aes(x = xmean, y = Inf, label = round(xmean, 2)),
             vjust = 1, size = 3) +
  theme_custom +
  theme(strip.text = element_text(size=8))
ggsave_v(p.cov.stkhl,
         file = "figures/cov_stkhl.pdf",
         width = 8.5, height = 8)

p.cov.stkhl.pct <- d.cov.stkhl %>%
	filter(grepl("\\%", cov2)) %>%
	ggplot(aes(x=val)) +
	facet_wrap(~ cov2, scales = "free_y", nrow = 3) +
	geom_density(fill="gray", color="gray", alpha=0.5) +
	scale_x_continuous(name = "", 
					   limits = c(0, 1),
					   labels = scales::percent_format(1)) +
	scale_y_continuous(name = "") +
	geom_vline(data = d.cov.stkhl %>%
			   	filter(grepl("\\%", cov2)) %>%
			   	
			   	group_by(cov2) %>%
			   	summarise(xmean = mean(val, na.rm=T)),
			   aes(xintercept = xmean), lty=2, alpha=0.5) +
	geom_label(data = d.cov.stkhl %>%
			   	filter(grepl("\\%", cov2)) %>%
			   	
			   	group_by(cov2) %>%
			   	summarise(xmean = mean(val, na.rm=T)),
			   aes(x = xmean, y = Inf, label = paste0(100*round(xmean, 2), "%")),
			   vjust = 1, size = 3) +
	theme_custom +
	theme(strip.text = element_text(size=8))
ggsave_v(p.cov.stkhl.pct,
		 file = "figures/cov_stkhl_pct.pdf",
		 width = 9, height = 5)


p.cov.stkhl.dir <- d.cov.stkhl %>%
	filter(!grepl("\\%", cov2)) %>%
	ggplot(aes(x=val)) +
	facet_wrap(~ cov2, scales = "free", nrow = 1) +
	geom_density(fill="gray", color="gray", alpha=0.5) +
	scale_x_continuous(name = "") +
	scale_y_continuous(name = "") +
	geom_vline(data = d.cov.stkhl %>%
			   	filter(!grepl("\\%", cov2)) %>%
			   	
			   	group_by(cov2) %>%
			   	summarise(xmean = mean(val, na.rm=T)),
			   aes(xintercept = xmean), lty=2, alpha=0.5) +
	geom_label(data = d.cov.stkhl %>%
			   	filter(!grepl("\\%", cov2)) %>%
			   	
			   	group_by(cov2) %>%
			   	summarise(xmean = mean(val, na.rm=T)),
			   aes(x = xmean, y = Inf, label = paste0(100*round(xmean, 2), "%")),
			   vjust = 1, size = 3) +
	theme_custom +
	theme(strip.text = element_text(size=8))
ggsave_v(p.cov.stkhl.dir,
		 file = "figures/cov_stkhl_dir.pdf",
		 width = 9, height = 2)

### activities
actv.vars <- c("legis.R_frac",
               "opsec.R_share.org_dollars",
               "opsec.R_cand_share.org_dollars",
               "zippia_empl.Ethnicity.Non-White",
               "zippia_empl.Genders.Female",
               "hrc_rating",
               "gd.Race / Ethnicity.Black or African American.avg_rating",
               "glassdoor_Rating.Diversity and Inclusion",
               "gd.Gender.Women.avg_rating",
               "gd.Sexual Orientation.LGBTQ+.avg_rating",
               "cdp_avg_score",
               "clm100_policy.Organisation Score.March 2022", # Climate Action 100+ Organization Score (March 2022)
               "gjf.n_off.discr",
               "gjf.n_off.labor",
               "gjf.n_off.environ")

d.cov.actv <- d.brands %>%
  select_at(c("yougov_name", actv.vars)) %>%
  mutate_at(actv.vars, as.numeric) %>%
  gather(key="cov", value="val", -yougov_name) %>%
  mutate(cov1 = factor(cov, levels = actv.vars)) %>%
  mutate(cov2 = sanitize_var(cov, multi_line = TRUE)) %>%
  arrange(cov1) %>%
  mutate(cov2 = as_factor(cov2))

p.cov.actv <- d.cov.actv %>%
  ggplot(aes(x=val)) +
  facet_wrap(~ cov2, scales = "free") +
  geom_density(fill="gray", color="gray", alpha=0.5) +
  scale_x_continuous(name = "") +
  scale_y_continuous(name = "") +
  geom_vline(data = d.cov.actv %>%
               group_by(cov2) %>%
               summarise(xmean = mean(val, na.rm=T)),
             aes(xintercept = xmean), lty=2, alpha=0.5) +
  geom_label(data = d.cov.actv %>%
               group_by(cov2) %>%
               summarise(xmean = mean(val, na.rm=T)),
             aes(x = xmean, y = Inf, label = round(xmean, 2)),
             vjust = 1, size = 3) +
  theme_custom +
  theme(strip.text = element_text(size=7))
ggsave_v(p.cov.actv,
         file = "figures/cov_actv.pdf",
         width = 8.5, height = 8)

# 3.) VISUALIZE CONGRESS ------------------------------------------------------

## Partisan phrases -------------------------------------------------------

annot <- data.frame(
  #left-bottom, left-top, right-bottom, right-top
  xpos = c(-Inf, -Inf, Inf, Inf), 
  ypos =  c(-Inf, Inf, -Inf, Inf), 
  hjustvar = c(-.25,   #shifts bottom left text to the right; make more negative to move it further right
               -.25,   #shifts top left text to the right; make more negative to move it further right
               1.25,   #shifts bottom right text to the left; make more positive to move it further left
               1.25),  #shifts top right text to the left; make more positive to move it further left
  vjustvar = c(-1,     #shifts bottom left text upward; make more negative to move it further up
               2,      #shifts top left text downward; make more positive to move it further down
               -1,     #shifts bottom right text upward; make more negative to move it further up
               2)      #shifts top right text downward; make more positive to move it further down
)

scaled_text$keyness_partisan$feature <- gsub("_"," ",scaled_text$keyness_partisan$feature)

p.congress.keyness <- scaled_text$keyness_partisan %>%
  # mutate(feature = gsub("_"," ",feature)) %>%
  textplot_keyness(margin = 0.8, color = c("red", "blue"), n = 30) +
  annotate("text", label=expression(NULL %<-% bold("Democrats say more")), x=annot$xpos[1], y=annot$ypos[1], hjust=annot$hjustvar[1], vjust=annot$vjustvar[1], fontface="bold") +
  annotate("text", label=expression(bold("Republicans say more") %->% NULL), x=annot$xpos[4], y=annot$ypos[4], hjust=annot$hjustvar[4], vjust=annot$vjustvar[4], fontface="bold") +
  xlab(TeX("$\\chi^2$ of bigram count in Congressional social media")) +
  theme(legend.position="none")
ggsave_v(p.congress.keyness, 
         file="figures/text/congress_bigrams_partisan.pdf", 
         height=9, width=5)

# 4.) VISUALIZE TEXT ----------------------------------------------------------

## By # mentions ----------------------------------------------------------

bin_pval <- function(.) {
	case_when(. < 0.001 ~ "0.001",
			  . < 0.01 ~ "0.01",
			  . < 0.05 ~ "0.05",
			  . < 0.1 ~ "0.1",
			  TRUE ~ as.character(round(., 2)))
}

p.text.mentions.overall <- d.text %>%
	mutate(lbl = destem_text(feature)) %>%
	filter(n_mentions > MIN_BIGRAM_COUNT) %>%
	partisan_phrase_scatterplot(x.col = "chi2", y.col = "n_mentions", size.col = "n_mentions", lbl.col = "lbl", color.col = "color",
								x.lab = TeX("Scaled $\\chi^2$ value of phrase in Congressional social media ($\\gamma_j$)"),
								max.overlaps = 20, flag.size = .10, text.size = 4, flag.thin = TRUE, min.segment.length = 2,
								annot.top.left = expression(NULL %<-% bold("Democrats say more")), annot.top.right = expression(bold("Republicans say more") %->% NULL)) +
	scale_y_continuous(name = "Number of mentions", trans = scales::pseudo_log_trans(base = 10), breaks = c(1,10,100,1000,2000)) +
	facet_wrap(~ "Overall") +
	annotate("text",
			 x = -Inf, y = MIN_BIGRAM_COUNT, hjust = -0.1, color= "blue",
			 label = t.test(rep(d.text$chi2, d.text$n_mentions), mu = 0) %>% 
			 	with(., paste0("t = ", round(statistic[[1]], 1),", ",
			 				   "p < ", bin_pval(p.value)))) +
	theme_custom +
	theme(legend.position = "none")

summary(lm(log(n_mentions+1) ~ chi2, data = d.text))
summary(lm(log(n_mentions+1) ~ I(chi2<0), data = d.text))

p.text.mentions.by_type.1 <- d.text %>%
	filter(type %in% c("issues", "groups")) %>%
	mutate(lbl = destem_text(feature),
		   type = stringr::str_to_title(type)) %>%
	filter(n_mentions > MIN_BIGRAM_COUNT) %>%
	partisan_phrase_scatterplot(x.col = "chi2", y.col = "n_mentions", size.col = "n_mentions", lbl.col = "lbl", color.col = "color",
								x.lab = TeX("Scaled $\\chi^2$ value of phrase in Congressional social media ($\\gamma_j$)"),
								max.overlaps = 12, flag.size = .2, text.size = 2.5, min.segment.length = 7) +
	scale_y_continuous(name = "Number of mentions", trans = scales::pseudo_log_trans(base = 10), breaks = c(1,10,100,1000,2000)) +
	facet_wrap(~ type) +
	geom_text(data = bind_rows(
		with(subset(d.text, subset = type == "issues"),
			 t.test(rep(chi2, n_mentions)), mu = 0) %>%
			with(., data.frame(type = "Issues", 
							   label = paste0("t = ", round(statistic[[1]], 1),", ",
							   			   "p < ", bin_pval(p.value)))),
		with(subset(d.text, subset = type == "groups"),
			 t.test(rep(chi2, n_mentions)), mu = 0) %>%
			with(., data.frame(type = "Groups", 
							   label = paste0("t = ", round(statistic[[1]], 1),", ",
							   			   "p < ", bin_pval(p.value))))
	), aes(x = -Inf, y = MIN_BIGRAM_COUNT, hjust = -0.1, vjust = 0.1, color = "blue", label = label)) +
	theme_custom +
	theme(legend.position = "none")

summary(lm(log(n_mentions+1) ~ chi2, data = d.text, subset = type == "issues"))
summary(lm(log(n_mentions+1) ~ chi2, data = d.text, subset = type == "groups"))

summary(lm(log(n_mentions+1) ~ I(chi2<0), data = d.text, subset = type == "issues"))
summary(lm(log(n_mentions+1) ~ I(chi2<0), data = d.text, subset = type == "groups"))

t.test(with(subset(d.text, subset = type == "issues"), rep(chi2, n_mentions)), mu = 0)
t.test(with(subset(d.text, subset = type == "groups"), rep(chi2, n_mentions)), mu = 0)

p.text.mentions.by_type.2 <- d.text %>%
	filter(type %in% c("individuals", "observances", "expressions")) %>%
	mutate(lbl = destem_text(feature),
		   type = stringr::str_to_title(type)) %>%
	filter(n_mentions > MIN_BIGRAM_COUNT) %>%
	partisan_phrase_scatterplot(x.col = "chi2", y.col = "n_mentions", size.col = "n_mentions", lbl.col = "lbl", color.col = "color",
								x.lab = TeX("Scaled $\\chi^2$ value of phrase in Congressional social media ($\\gamma_j$)"),
								max.overlaps = 16, flag.size = .2, text.size = 2.75, flag.thin = TRUE, min.segment.length = 7) +
	scale_y_continuous(name = "Number of mentions", trans = scales::pseudo_log_trans(base = 10), breaks = c(1,10,100,1000,2000)) +
	facet_wrap(~ type) +
	geom_text(data = bind_rows(
		with(subset(d.text, subset = type == "individuals"),
			 t.test(rep(chi2, n_mentions)), mu = 0) %>%
			with(., data.frame(type = "Individuals", 
							   label = paste0("t = ", round(statistic[[1]], 1),", ",
							   			   "p < ", bin_pval(p.value)))),
		with(subset(d.text, subset = type == "observances"),
			 t.test(rep(chi2, n_mentions)), mu = 0) %>%
			with(., data.frame(type = "Observances", 
							   label = paste0("t = ", round(statistic[[1]], 1),", ",
							   			   "p < ", bin_pval(p.value)))),
		with(subset(d.text, subset = type == "expressions"),
			 t.test(rep(chi2, n_mentions)), mu = 0) %>%
			with(., data.frame(type = "Expressions", 
							   label = paste0("t = ", round(statistic[[1]], 1),", ",
							   			   "p < ", bin_pval(p.value))))
	), aes(x = -Inf, y = MIN_BIGRAM_COUNT, hjust = -0.1, vjust = 0.1, color = "blue", label = label)) +	
	theme_custom +
	theme(legend.position = "none")

summary(lm(log(n_mentions+1) ~ chi2, data = d.text, subset = type == "individuals"))
summary(lm(log(n_mentions+1) ~ chi2, data = d.text, subset = type == "observances"))
summary(lm(log(n_mentions+1) ~ chi2, data = d.text, subset = type == "expressions"))

summary(lm(log(n_mentions+1) ~ I(chi2 < 0), data = d.text, subset = type == "individuals"))
summary(lm(log(n_mentions+1) ~ I(chi2 < 0), data = d.text, subset = type == "observances"))
summary(lm(log(n_mentions+1) ~ I(chi2 < 0), data = d.text, subset = type == "expressions"))

t.test(with(subset(d.text, subset = type == "individuals"), rep(chi2, n_mentions)), mu = 0)
t.test(with(subset(d.text, subset = type == "observances"), rep(chi2, n_mentions)), mu = 0)
t.test(with(subset(d.text, subset = type == "expressions"), rep(chi2, n_mentions)), mu = 0)

p.text.mentions.by_type  <- cowplot::plot_grid(
	p.text.mentions.by_type.1 + scale_x_continuous(breaks = c(-300, -100, 0, 100, 300), limits=c(-300, 300)), 
	p.text.mentions.by_type.2 + scale_x_continuous(breaks = c(-300, -100, 0, 100, 300), limits=c(-300, 300)), 
	nrow=2
)
p.text.mentions.combined <- cowplot::plot_grid(
	p.text.mentions.overall + scale_x_continuous(breaks = c(-300, -100, 0, 100, 300), limits=c(-300, 300)), 
	p.text.mentions.by_type + scale_x_continuous(breaks = c(-300, -100, 0, 100, 300), limits=c(-300, 300)), 
	nrow=2
)

ggsave_v(p.text.mentions.combined, 
         file = "figures/text/brands_partisan1.pdf",
         height=8, width=10)

## By # brands ------------------------------------------------------------
## (copied and pasted from above)

p.text.brands.overall <- d.text %>%
  mutate(lbl = destem_text(feature)) %>%
  filter(n_mentions > MIN_PARTISAN_BIGRAMS) %>%
  partisan_phrase_scatterplot(x.col = "chi2", y.col = "n_brands", size.col = "n_brands", lbl.col = "lbl", color.col = "color",
                              x.lab = TeX("Scaled $\\chi^2$ value of phrase in Congressional social media"),
                              max.overlaps = 20, flag.size = .05, text.size = 4, flag.thin = TRUE, min.segment.length = 2,
                              annot.top.left = expression(NULL %<-% bold("Democrats say more")), annot.top.right = expression(bold("Republicans say more") %->% NULL)) +
  scale_y_continuous(name = "Number of brands", trans = scales::pseudo_log_trans(base = 10), breaks = c(1,10,100,1000,2000)) +
  facet_wrap(~ "Overall")

p.text.brands.by_type.1 <- d.text %>%
  filter(type %in% c("issues", "groups")) %>%
  mutate(lbl = destem_text(feature),
         type = stringr::str_to_title(type)) %>%
  filter(n_mentions > MIN_PARTISAN_BIGRAMS) %>%
  partisan_phrase_scatterplot(x.col = "chi2", y.col = "n_brands", size.col = "n_brands", lbl.col = "lbl", color.col = "color",
                              x.lab = TeX("Scaled $\\chi^2$ value of phrase in Congressional social media"),
                              max.overlaps = 12, flag.size = .1, text.size = 2.5, min.segment.length = 7) +
  scale_y_continuous(name = "Number of brands", trans = scales::pseudo_log_trans(base = 10), breaks = c(1,10,100,1000,2000)) +
  facet_wrap(~ type)

p.text.brands.by_type.2 <- d.text %>%
  filter(type %in% c("individuals", "observances", "expressions")) %>%
  mutate(lbl = destem_text(feature),
         type = stringr::str_to_title(type)) %>%
  filter(n_mentions > MIN_PARTISAN_BIGRAMS) %>%
  partisan_phrase_scatterplot(x.col = "chi2", y.col = "n_brands", size.col = "n_brands", lbl.col = "lbl", color.col = "color",
                              x.lab = TeX("Scaled $\\chi^2$ value of phrase in Congressional social media"),
                              max.overlaps = 16, flag.size = .1, text.size = 2.75, flag.thin = TRUE, min.segment.length = 7) +
  scale_y_continuous(name = "Number of brands", trans = scales::pseudo_log_trans(base = 10), breaks = c(1,10,100,1000,2000)) +
  facet_wrap(~ type)

p.text.brands.by_type  <- cowplot::plot_grid(p.text.brands.by_type.1, p.text.brands.by_type.2, nrow=2)
p.text.brands.combined <- cowplot::plot_grid(p.text.brands.overall, p.text.brands.by_type, nrow=2)

ggsave_v(p.text.brands.combined, 
         file = "figures/text/brands_partisan2.pdf",
         height = 8, width = 10)

## By type x context (heatmap) --------------------------------------------

d.type.ctxt.heatmap.0 <- scaled_text$dfm_partisan_x_texttype_df_long %>%
  # filter(!grepl("busi", feature)) %>%
  ### selected contexts
  filter(grepl("(belief|celeb|charit|donat|goal|info|honor|invest|philanth|pride|stanc|support|oppos|values|volunt)", text_type, ignore.case = T)) %>%
  mutate(feature = gsub("_"," ",feature)) %>%
  ### join with bigram type
  inner_join(distinct(partisan_bigrams_bytype, entity_type=type, feature),
             by = "feature") %>%
  ### selected types
  filter(grepl("(group|expression|individual|issue|observance)", entity_type, ignore.case=T)) %>%
  filter(!is.na(text_type), text_type != "", !is.na(entity_type)) %>%
  ### sanitize
  mutate(entity_type = str_to_title(entity_type),
         text_type = str_to_title(gsub("_","/",text_type)),
         text_type = case_when(
           grepl("Believe", text_type) ~ "Belief",
           grepl("Celeb", text_type)   ~ "Celebration",
           grepl("Donat", text_type)  ~ "Donation",
           grepl("Goals", text_type)  ~ "Goals/Missions",
           grepl("Philan", text_type)  ~ "Philanthropy",
           grepl("Info", text_type)    ~ "Information",
           grepl("Invest", text_type)  ~ "Investment",
           # grepl("Mtwabp", text_type)  ~ '"Make the World\na Better Place"',
           grepl("Oppose", text_type)  ~ "Position: Oppose",
           grepl("Support", text_type) ~ "Position: Support",
           grepl("Volun", text_type) ~ "Voluntarism",
           # grepl("Market", text_type)  ~ "Market Communications",
           grepl("Value", text_type)   ~ "Values",
           TRUE ~ text_type)) 

d.type.ctxt.heatmap <- d.type.ctxt.heatmap.0 %>%  
  group_by(text_type, entity_type) %>%
  summarise(count = sum(count), .groups = "drop") %>%
  mutate(count_lbl = case_when(percent_rank(count) > 0.75 ~ as.character(count), TRUE ~ "")) %>%
  mutate(count_lbl = formatC(count, big.mark=",")) %>%
  mutate(entity_type = factor(entity_type,
                              levels = rev(c("Groups", "Issues", "Expressions", "Individuals", "Observances")))) %>%
  mutate(c = ifelse(percent_rank(count) >= 0.99, "black", "white"))

d.type.ctxt.heatmap %>% 
  group_by(text_type) %>% 
  summarise(count = sum(count)) %>%
  mutate(pct = count/sum(count)) %>%
  arrange(-count)

d.type.ctxt.heatmap %>% 
  group_by(entity_type) %>% 
  summarise(count = sum(count)) %>%
  mutate(pct = count/sum(count)) %>%
  arrange(-count)

p.type.ctxt.heatmap <- d.type.ctxt.heatmap %>%
  ggplot(aes(x=text_type, y=entity_type, fill= count)) + 
  geom_tile(color="white", size=0.1) +
  geom_text(aes(label = count_lbl, color = c), fontface = "plain", size = 6) +
  # scale_fill_distiller(name="Co-occurring posts:") +
  scale_fill_viridis_c(name="Co-occurrences:",
  					   breaks=c(1, 400, 800, 1200, 1600),
  					   labels=c(0, 400, 800, 1200, 1600)) +
  scale_color_identity() +
  ggthemes::theme_tufte(base_family="Helvetica") +
  xlab("Type of social media post (context)") +
  ylab("Type of partisan phrase") +
  theme(axis.ticks.y=element_blank(),
        axis.text=element_text(size=12),
        axis.title=element_text(size=14),
        panel.border=element_blank(),
        plot.title=element_text(hjust=0),
        strip.text=element_text(hjust=0),
        axis.text.x=element_text(angle=25, hjust=1),
        panel.margin.x=unit(0.5, "cm"),
        panel.margin.y=unit(1, "cm"),
        legend.title=element_text(size=12),
        legend.title.align=1,
        legend.text=element_text(size=12),
        legend.position="bottom",
        legend.key.size=unit(0.2, "cm"),
        legend.key.width=unit(1, "cm"))
ggsave_v(p.type.ctxt.heatmap, file ="figures/text/brands_partisan_context_heatmap.pdf", height=4, width=9)


p.type.ctxt.heatmap2 <- d.type.ctxt.heatmap %>%
	group_by(entity_type) %>%
	mutate(count = count/sum(count)) %>%
	mutate(count_lbl = paste0(100*round(count, 2),"%")) %>%
	ggplot(aes(x=text_type, y=entity_type, fill= count)) + 
	geom_tile(color="white", size=0.1) +
	geom_text(aes(label = count_lbl, color = c), fontface = "plain", size = 6) +
	# scale_fill_distiller(name="Co-occurring posts:") +
	scale_fill_viridis_c(name="Co-occurrences:") +
	scale_color_identity() +
	ggthemes::theme_tufte(base_family="Helvetica") +
	xlab("Type of social media post (context)") +
	ylab("Type of partisan phrase") +
	theme(axis.ticks.y=element_blank(),
		  axis.text=element_text(size=12),
		  axis.title=element_text(size=14),
		  panel.border=element_blank(),
		  plot.title=element_text(hjust=0),
		  strip.text=element_text(hjust=0),
		  axis.text.x=element_text(angle=25, hjust=1),
		  panel.margin.x=unit(0.5, "cm"),
		  panel.margin.y=unit(1, "cm"),
		  legend.title=element_text(size=12),
		  legend.title.align=1,
		  legend.text=element_text(size=12),
		  legend.position="bottom",
		  legend.key.size=unit(0.2, "cm"),
		  legend.key.width=unit(1, "cm"))
ggsave_v(p.type.ctxt.heatmap2, file ="figures/text/brands_partisan_context_heatmap2.pdf", height=4, width=9)

## By type x context x PID (heatmap) --------------------------------------

d.type.ctxt.heatmap.d <- d.type.ctxt.heatmap.0 %>%  
  filter(chi2 < 0) %>%
  group_by(text_type, entity_type) %>%
  summarise(count = sum(count), .groups = "drop") %>%
  mutate(count_lbl = case_when(percent_rank(count) > 0.75 ~ as.character(count), TRUE ~ "")) %>%
  mutate(entity_type = factor(entity_type,
                              levels = rev(c("Groups", "Issues", "Expressions", "Individuals", "Observances")))) %>%
  mutate(c = ifelse(percent_rank(count) >= 0.99, "black", "white"))

d.type.ctxt.heatmap.d %>% 
  group_by(text_type) %>% 
  summarise(count = sum(count)) %>%
  mutate(pct = count/sum(count)) %>%
  arrange(-count)

d.type.ctxt.heatmap.d %>% 
  group_by(entity_type) %>% 
  summarise(count = sum(count)) %>%
  mutate(pct = count/sum(count)) %>%
  arrange(-count)

p.type.ctxt.heatmap.d <- d.type.ctxt.heatmap.d %>%
  ggplot(aes(x=text_type, y=entity_type, fill= count)) + 
  geom_tile(color="white", size=0.1) +
  geom_text(aes(label = count_lbl, color = c), fontface = "plain", size = 6) +
  # scale_fill_distiller(name="Co-occurring posts:") +
  scale_fill_viridis_c(name="Co-occurrences:") +
  scale_color_identity() +
  ggthemes::theme_tufte(base_family="Helvetica") +
  xlab("Type of social media post (context)") +
  ylab("Type of partisan phrase") +
  theme(axis.ticks.y=element_blank(),
        axis.text=element_text(size=12),
        axis.title=element_text(size=14),
        panel.border=element_blank(),
        plot.title=element_text(hjust=0),
        strip.text=element_text(hjust=0),
        axis.text.x=element_text(angle=25, hjust=1),
        panel.margin.x=unit(0.5, "cm"),
        panel.margin.y=unit(1, "cm"),
        legend.title=element_text(size=12),
        legend.title.align=1,
        legend.text=element_text(size=12),
        legend.position="bottom",
        legend.key.size=unit(0.2, "cm"),
        legend.key.width=unit(1, "cm"))
ggsave_v(p.type.ctxt.heatmap.d, file ="figures/text/brands_partisan_context_heatmap_D.pdf", height=4, width=9)

# 

d.type.ctxt.heatmap.r <- d.type.ctxt.heatmap.0 %>%  
  filter(chi2 > 0) %>%
  group_by(text_type, entity_type) %>%
  summarise(count = sum(count), .groups = "drop") %>%
  mutate(count_lbl = case_when(percent_rank(count) > 0.75 ~ as.character(count), TRUE ~ "")) %>%
  mutate(entity_type = factor(entity_type,
                              levels = rev(c("Groups", "Issues", "Expressions", "Individuals", "Observances")))) %>%
  mutate(c = ifelse(percent_rank(count) >= 0.99, "black", "white"))

d.type.ctxt.heatmap.r %>% 
  group_by(text_type) %>% 
  summarise(count = sum(count)) %>%
  mutate(pct = count/sum(count)) %>%
  arrange(-count)

d.type.ctxt.heatmap.r %>% 
  group_by(entity_type) %>% 
  summarise(count = sum(count)) %>%
  mutate(pct = count/sum(count)) %>%
  arrange(-count)

p.type.ctxt.heatmap.r <- d.type.ctxt.heatmap.r %>%
  ggplot(aes(x=text_type, y=entity_type, fill= count)) + 
  geom_tile(color="white", size=0.1) +
  geom_text(aes(label = count_lbl, color = c), fontface = "plain", size = 6) +
  # scale_fill_distiller(name="Co-occurring posts:") +
  scale_fill_viridis_c(name="Co-occurrences:") +
  scale_color_identity() +
  ggthemes::theme_tufte(base_family="Helvetica") +
  xlab("Type of social media post (context)") +
  ylab("Type of partisan phrase") +
  theme(axis.ticks.y=element_blank(),
        axis.text=element_text(size=12),
        axis.title=element_text(size=14),
        panel.border=element_blank(),
        plot.title=element_text(hjust=0),
        strip.text=element_text(hjust=0),
        axis.text.x=element_text(angle=25, hjust=1),
        panel.margin.x=unit(0.5, "cm"),
        panel.margin.y=unit(1, "cm"),
        legend.title=element_text(size=12),
        legend.title.align=1,
        legend.text=element_text(size=12),
        legend.position="bottom",
        legend.key.size=unit(0.2, "cm"),
        legend.key.width=unit(1, "cm"))
ggsave_v(p.type.ctxt.heatmap.r, file ="figures/text/brands_partisan_context_heatmap_R.pdf", height=4, width=9)


## By date ----------------------------------------------------------------

event_markers <- bind_rows(
  # data.frame(date=as.character(timeDate::holiday(STUDY_YEARS, "USMemorialDay")),
  #            lbl="Memorial Day"),
  # data.frame(date=as.character(timeDate::holiday(STUDY_YEARS, "USVeteransDay")),
  #            lbl="Veteran's Day"),
  # data.frame(date=as.character(timeDate::holiday(STUDY_YEARS, "USMLKingsBirthday")),
  #            lbl="MLK Day"),
  # data.frame(date=as.character(timeDate::holiday(STUDY_YEARS, "USElectionDay")),
  #            lbl="Election Day"),
  # data.frame(date=as.character(timeDate::holiday(STUDY_YEARS, "USIndependenceDay")),
  #            lbl="July 4th"),    
  data.frame(date=as.character("2020-05-25"), lbl="George Floyd   "),
  # data.frame(date=as.character("2020-03-13"), lbl="Breonna Taylor"),
  data.frame(date=as.character("2021-01-06"), lbl="Jan 6th   ")    
)

### Average signal across brands over time ----
p.wk <- d.wk %>%
  filter(!is.na(chi2)) %>%
  mutate(col = ifelse(chi2 > 0, "Republicans", "Democrats")) %>%
  ggplot(aes(x=date, y=chi2)) +
  geom_hline(yintercept = 0, lty = 2, alpha = 0.5) +
  geom_line(aes(color = col, group = 1)) +
  geom_vline(data=event_markers,
             aes(xintercept=as.Date(date)), colour="black", lty=2, size=0.5, alpha=0.8, inherit.aes=FALSE) +
  geom_text(data=event_markers, aes(x = as.Date(date), label=lbl), 
            y = Inf, hjust = 1.15, vjust=-0.5, size=4, angle=90, inherit.aes=FALSE) +
  scale_y_continuous() +
  stat_smooth(color = "purple") +
  scale_x_date(date_labels = '%Y',breaks = '24 months', limits = c(min(d.wk$date),max(d.wk$date))) +
  scale_y_continuous(limits = c(-max(abs(d.wk$chi2),na.rm=T), max(abs(d.wk$chi2),na.rm=T))) +
  facet_wrap(~ "Overall") + 
  scale_colour_manual(values = c("blue","red"), name="Speech more associated with:") +
  annotate("text", label = expression(NULL %<-% bold("More Dem.")), 
           x = min(d.wk$date), y = -Inf, hjust = -.05, size = 2.5, fontface = "bold", angle = 90) +
  annotate("text", label = expression(bold("More Rep." %->% NULL)), 
           x = min(d.wk$date), y = Inf, hjust = 1.05, size = 2.5, fontface = "bold", angle = 90) +
  xlab("") + 
  ylab("Average of partisan signals") +
  theme_custom + 
  theme(legend.position="none",
        axis.text.x=element_text(size=8))

### Counts of partisan phrases by type over time ----
p.partisan.type.wk.1 <- d.partisan.type.wk %>% 
  # full_join(expand(d.partisan.type.wk, date, party, type)) %>%
  mutate(pct_pty_posts = ifelse(is.na(pct_pty_posts), 0, pct_pty_posts)) %>%
  filter(tolower(type) %in% c("groups","issues")) %>%
  ggplot(aes(x=date, y=pct_pty_posts)) +
  geom_vline(data=event_markers,
             aes(xintercept=as.Date(date)), colour="black", lty=2, size=0.5, alpha=0.8, inherit.aes=FALSE) +    
  geom_line(aes(colour=party)) +
  facet_wrap(~ type, ncol=2) + 
  scale_y_continuous(label = scales::percent_format(1)) +
  scale_colour_manual(values = c("blue","red"), labels = c("Democrats", "Republicans"), name="Speech more associated with:") + 
  # geom_text(data=event_markers, aes(x = as.Date(date), label=lbl), 
  #           y = Inf, hjust = 1.15, vjust=-0.5, size=1.5, angle=90, inherit.aes=FALSE, fontface="plain") +
  xlab("") + ylab("% of weekly posts") +
  theme_custom +
  theme(legend.position="none",
        axis.text.x=element_text(size=8))

p.partisan.type.wk.2 <- d.partisan.type.wk %>%
  # full_join(expand(d.partisan.type.wk, date, party, type)) %>%
  mutate(pct_pty_posts = ifelse(is.na(pct_pty_posts), 0, pct_pty_posts)) %>%
  filter(tolower(type) %in% c("individuals", "observances", "expressions")) %>%
  ggplot(aes(x=date, y=pct_pty_posts)) +
  geom_vline(data=event_markers,
             aes(xintercept=as.Date(date)), colour="black", lty=2, size=0.5, alpha=0.8, inherit.aes=FALSE) +    
  geom_line(aes(colour=party)) +
  facet_wrap(~ type, ncol=3) + 
  scale_y_continuous(label = scales::percent_format(1)) +
  scale_colour_manual(values = c("blue","red"), labels = c("Democrats", "Republicans"), name="Speech more associated with:") + 
  # geom_text(data=event_markers, aes(x = as.Date(date), label=lbl), 
  #           y = Inf, hjust = 1.15, vjust=-0.5, size=1.5, angle=90, inherit.aes=FALSE, fontface="plain") +
  xlab("") + ylab("% of weekly posts") +
  theme_custom +
  theme(legend.position="bottom",
        axis.text.x=element_text(size=8))

### Counts of partisan phrases by type over time (% of brands) ----
p.partisan.type.brands.wk.1 <- d.partisan.type.brand.wk %>% 
  full_join(expand(d.partisan.type.brand.wk, timestamp_ymd, party, type)) %>%
  mutate(count_brands = ifelse(is.na(count_brands), 0, count_brands)) %>%
  mutate(pct_brand_posts = count_brands/
           length(unique(d.brands$yougov_name[!is.na(d.brands$ig_account) | !is.na(d.brands$tw_account)]))) %>%
  filter(tolower(type) %in% c("groups","issues"), !is.na(party)) %>%
  mutate(date = as.Date(timestamp_ymd)) %>%
  mutate(type = stringr::str_to_title(type)) %>%
  group_by(date, type, party) %>%
  summarise(count_brands = sum(count_brands)) %>%
  ggplot(aes(x=date, y=count_brands)) +
  geom_vline(data=event_markers,
             aes(xintercept=as.Date(date)), colour="black", lty=2, size=0.5, alpha=0.8, inherit.aes=FALSE) +
  geom_line(aes(colour=party)) +
  facet_wrap(~ type, ncol=2) + 
  # scale_y_continuous(label = scales::percent_format(1)) +
  scale_colour_manual(values = c("blue","red"), labels = c("Democrats", "Republicans"), name="Speech more associated with:") + 
  # geom_text(data=event_markers, aes(x = as.Date(date), label=lbl), 
  #           y = Inf, hjust = 1.15, vjust=-0.5, size=1.5, angle=90, inherit.aes=FALSE, fontface="plain") +
  xlab("") + ylab("Number of brands") +
  theme_custom +
  theme(legend.position="none",
        axis.text.x=element_text(size=8))

p.partisan.type.brands.wk.2 <- d.partisan.type.brand.wk %>%
  full_join(expand(d.partisan.type.brand.wk, timestamp_ymd, party, type)) %>%
  mutate(count_brands = ifelse(is.na(count_brands), 0, count_brands)) %>%
  mutate(pct_brand_posts = count_brands/
           length(unique(d.brands$yougov_name[!is.na(d.brands$ig_account) | !is.na(d.brands$tw_account)]))) %>%  
  filter(tolower(type) %in% c("individuals", "observances", "expressions"), !is.na(party)) %>%
  mutate(date = as.Date(timestamp_ymd)) %>%
  mutate(type = stringr::str_to_title(type)) %>%
  group_by(date, type, party) %>%
  summarise(count_brands = sum(count_brands)) %>%
  ggplot(aes(x=date, y=count_brands)) +
  geom_vline(data=event_markers,
             aes(xintercept=as.Date(date)), colour="black", lty=2, size=0.5, alpha=0.8, inherit.aes=FALSE) +
  geom_line(aes(colour=party)) +
  facet_wrap(~ type, ncol=3) + 
  # scale_y_continuous(label = scales::percent_format(1)) +
  scale_colour_manual(values = c("blue","red"), labels = c("Democrats", "Republicans"), name="Speech more associated with:") + 
  # geom_text(data=event_markers, aes(x = as.Date(date), label=lbl), 
  #           y = Inf, hjust = 1.15, vjust=-0.5, size=1.5, angle=90, inherit.aes=FALSE, fontface="plain") +
  xlab("") + ylab("Number of brands") +
  theme_custom +
  theme(legend.position="bottom",
        axis.text.x=element_text(size=8))

### Identify corresponding phrases for peaks ----
d.partisan.type.wk.peaks <- d.partisan.type.wk %>%
  #### find peak weeks
  group_by(party, type) %>%
  arrange(desc(pct_pty_posts)) %>%
  filter(row_number() %in% 1) %>% ## <specify number of peaks here>
  #### find original phrases used in that week
  inner_join(d.partisan.t %>%
               mutate(type = stringr::str_to_title(type)) %>%
               select(timestamp_wk=timestamp_week, feature, count, party, type)) %>%
  group_by(year, timestamp_wk, date, party, type) %>%
  arrange(desc(count)) %>%
  filter(row_number() %in% 1) ## <specify number of phrases here>

p.partisan.type.wk.1 <- p.partisan.type.wk.1 +
  geom_text_repel(data = d.partisan.type.wk.peaks %>%
                    filter(tolower(type) %in% c("groups","issues"),
                           !grepl("🇺🇸", feature)) %>%
                    mutate(lbl = destem_text(feature)),
                  aes(x=date, y=pct_pty_posts, label=lbl, colour=party),
                  min.segment.length = unit(0, 'lines'), nudge_x = -1000, nudge_y = .05)

p.partisan.type.wk.2 <- p.partisan.type.wk.2 +
  geom_text_repel(data = d.partisan.type.wk.peaks %>%
                    filter(tolower(type) %in% c("individuals", "observances", "expressions"),
                           !grepl("🇺🇸", feature)) %>%
                    mutate(lbl = destem_text(feature)),
                  aes(x=date, y=pct_pty_posts, label=lbl, colour=party),
                  min.segment.length = unit(0, 'lines'), nudge_x = -1000, nudge_y = .01) +
  geom_image(data = d.partisan.type.wk.peaks %>%
               filter(tolower(type) %in% c("individuals", "observances", "expressions"),
                      grepl("🇺🇸", feature)) %>%
               mutate(img = EMOJI_FLAG_THIN_FPATH),
             aes(x=date, y=pct_pty_posts, image=img), nudge_y = .005, size = .05)


p.partisan.type.wk  <- cowplot::plot_grid(p.partisan.type.wk.1, p.partisan.type.wk.2, nrow=2)
p.wk.combined       <- cowplot::plot_grid(p.wk, p.partisan.type.wk, nrow=2, rel_heights = c(1.5, 2))

ggsave_v(p.wk.combined, 
         file = "figures/overtime/brands_partisan_overtime.pdf",
         width=8, height=8)

#### skip for the % brands version of the plot for simplicity's sake
p.partisan.type.brands.wk  <- cowplot::plot_grid(p.partisan.type.brands.wk.1, p.partisan.type.brands.wk.2, nrow=2)

ggsave_v(p.partisan.type.brands.wk, 
         file = "figures/overtime/brands_partisan_overtime2.pdf",
         width=8, height=5)

# 5.) VISUALIZE BRANDS --------------------------------------------------------

## Brands by sector -----------------------------------------------------------

for (var in idealpt.vars) {
  print(var)
  
  (pt.var.name <- paste0("ideal.", var))
  (n.var.name  <- paste0("ideal.", ifelse(grepl("\\.bin",var), gsub("\\.bin","",var), var),".n"))
  (ci.upr.var.name <- paste0("ideal.", var, ".ci.95.upr"))
  (ci.lwr.var.name <- paste0("ideal.", var, ".ci.95.lwr"))
  
  ##ideal points
  d.indus.ideal <- d.brands %>%
    mutate(yougov_brand_category = gsub(" brand","",yougov_brand_category)) %>%
    mutate(yougov_brand_category = str_to_title(yougov_brand_category)) %>%
    mutate(ideal = get(pt.var.name), 
           ideal.n = ifelse(n.var.name %in% colnames(d.brands), get(n.var.name), MIN_PARTISAN_BIGRAMS+1), 
           ideal.ci.upr = ifelse(ci.upr.var.name %in% colnames(d.brands), get(ci.upr.var.name), ideal),
           ideal.ci.lwr = ifelse(ci.lwr.var.name %in% colnames(d.brands), get(ci.lwr.var.name), ideal)) %>%
    ###must have enough data to be scaled with
    # filter(ideal.n > MIN_BIGRAM_COUNT, !is.na(yougov_brand_category), !is.na(yougov_name)) %>%
    filter(!is.na(ideal), !is.na(ideal.ci.upr)) %>%
    filter(!grepl("tv|media", yougov_brand_category, ignore.case=T)) %>%
    ###get sectoral average
    group_by(yougov_brand_category) %>%
    mutate(ideal.ind = mean(ideal)) %>%
    ungroup() %>%
    arrange(-ideal.ind) %>%
    mutate(yougov_brand_category = as_factor(yougov_brand_category)) %>%
    ###pick a few top most lib/cons brands to show
    group_by(yougov_brand_category) %>%
    arrange(-ideal) %>%    
    filter(row_number() %in% 1:2 | row_number() %in% ((n()-1):n())) %>%
    ungroup() %>%
    ###prep for plot
    arrange(yougov_brand_category, ideal) %>%
    mutate(yougov_name = as_factor(yougov_name)) %>%
    mutate(col = ifelse(ideal >= 0, "red", "blue")) %>%
    mutate(y=ideal, ymin=ideal.ci.upr, ymax=ideal.ci.lwr)
  
  d.indus.ideal.range <- c(-max(abs(c(d.indus.ideal$ymax, d.indus.ideal$ymin))),
                           max(abs(c(d.indus.ideal$ymax, d.indus.ideal$ymin))))
  p.indus.ideal <- d.indus.ideal %>%
    ggplot(aes(x=yougov_name, y=y, ymin=ymin, ymax=ymax, colour=col)) +
    geom_hline(yintercept=0, lty=3, alpha=0.5) +
    geom_pointrange(alpha=0.8) +
    scale_colour_identity() +
    scale_y_continuous(limits = d.indus.ideal.range) +
    coord_flip() + 
    xlab(expression(NULL %<-% "More Democrat-sounding sectors                                                                              More Republican-sounding sectors" %->% NULL)) +        
    facet_grid(yougov_brand_category ~ ., scales = "free") + 
    ylab(expression(NULL %<-% "Sounds more Dem.         Sounds more Rep." %->% NULL)) +
    theme_bw() +
    theme_custom_vertpanel
  ggsave_v(p.indus.ideal, file=sprintf("figures/idealpts/%s/indus.pdf", var), height=12.5, width=10)
}

## Distribution by sector -----------------------------------------------------

d.ideal.ind.dists <- d.brands %>%
  ###remove any media/TV brands if any remaining
  filter(!grepl("(comm|media|network)", yougov_brand_category)) %>%
  mutate(yougov_brand_category = str_to_title(yougov_brand_category)) %>%
  filter(between(ideal.main, quantile(d.brands$ideal.main, 0.01, na.rm=T), quantile(d.brands$ideal.main, 0.99, na.rm=T))) %>%
  group_by(yougov_brand_category) %>%
  mutate(ideal.main.cat = mean(ideal.main, na.rm=T)) %>% 
  ungroup() %>%
  arrange(-ideal.main.cat) %>%
  mutate(yougov_brand_category = as_factor(yougov_brand_category)) 
p.ideal.ind.dists <- d.ideal.ind.dists %>%
  ggplot(aes(x=ideal.main, fill=ideal.main.cat, color=ideal.main.cat)) +
  geom_density(alpha=0.8) +
  facet_grid(yougov_brand_category ~ ., scales = "free") +
  ylab(expression(NULL %<-% "More Democrat-sounding sectors                                                                              More Republican-sounding sectors" %->% NULL)) +        
  xlab(expression(NULL %<-% "Sounds more Dem.                            Sounds more Rep." %->% NULL)) +  
  geom_vline(data = d.ideal.ind.dists %>%
               distinct(yougov_brand_category, ideal.main.cat),
             aes(xintercept=ideal.main.cat, color=ideal.main.cat), lty=1, size=2) +
  geom_vline(xintercept=0, color="black", lty=2, alpha=0.5) + 
  scale_fill_gradient2(low = muted("blue"), mid = "white", high = muted("red")) +
  scale_color_gradient2(low = muted("blue"), mid = "white", high = muted("red")) +  
  scale_x_continuous(limits = c(-100, 100)) +
  theme_custom_vertpanel +
  theme(axis.text.y = element_blank(),
        legend.position = "none")
ggsave_v(p.ideal.ind.dists, file=sprintf("figures/idealpts/main/indus_dists.pdf", var), height=12, width=10)

# 6.) DESCRIBE ALIGNMENT ------------------------------------------------------

#for (var in c("main","tw","ig","mdl","stances")) {
for (var in c("main")) {  
  t0 <- Sys.time()
  # for (var in c("tw","ig","stances")) {
  
  print(var)
  
  (pt.var.name <- paste0("ideal.", var))
  (n.var.name  <- paste0("ideal.", ifelse(grepl("\\.bin",var), gsub("\\.bin","",var), var),".n"))
  (se.var.name <- paste0("ideal.", var, ".se"))
  
  d.brands$ideal    <- d.brands[[pt.var.name]]
  d.brands$ideal.n  <- if (n.var.name %in% colnames(d.brands)) { d.brands[[n.var.name]] } else { d.brands$ideal.main.n }
  d.brands$ideal.se <- d.brands[[se.var.name]]
  
  ## 6.1. Stakeholders' partisan preferences -------------------------------------
  
  ### All Employees: Donations ----
  r.ideal.empl.don <- d.brands %>% 
    mutate(R_don_share = coalesce(FEC.R_don_share, opsec.R_share.indiv_dollars)) %>%
    measure_alignment(x.col = "R_don_share", y.col = "ideal", lbl.col = "yougov_name", size.col = "ideal.n",
                      x.lab = "% of Rep. Employee Donations", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      text.size = 2, x.lim = c(0, 1), y.mid = 0, x.mid = 0.5, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.empl.don$plot + facet_wrap( ~ "All Employees"), 
           filename = sprintf("figures/align/%s/brands_ideal_empl_don.pdf",var),
           width = 6, height = 4)
  ggsave_v(r.ideal.empl.don$plot.miss,
           filename = sprintf("figures/align/%s/brands_ideal_empl_don_missing.pdf",var),
           width = 6, height = 6)
  if (FALSE) {
    r.ideal.empl.don$plot
    r.ideal.empl.don$plot.miss
  }
  
  ### Employees by Type: Donations ----
  r.ideal.empl.don.bytype <- d.brands %>% 
    filter(ideal.n > MIN_PARTISAN_BIGRAMS) %>%
    mutate(`x.Board\nMembers`    = FEC.R_don_share.Board_Member,
           `x.Managers`          = FEC.R_don_share.Managers,
           `x.Legal`             = FEC.R_don_share.Legal,
           `x.Human\nResources`  = FEC.R_don_share.Human_Resources,
           `x.Executives`        = coalesce(FEC.R_don_share.Top_Exec, stkz21.R_share_exec),         
           `x.Public\nRelations` = coalesce(FEC.R_don_share.Public_Relations, stkz21.R_share_govt),
           `x.Marketing`         = coalesce(FEC.R_don_share.Marketing, stkz21.R_share_prmkt),
           `x.Rank and File`     = coalesce(FEC.R_don_share.Rank_and_File, stkz21.R_share_rnf)) %>%
    select(yougov_name, ideal, ideal.n, starts_with("x.")) %>%
    gather(key="empl_type", value="R_don_share", -yougov_name, -ideal, -ideal.n) %>%
    mutate(empl_type = gsub("x\\.","",empl_type)) %>% 
    mutate(empl_type = factor(empl_type, levels = c("Executives","Board\nMembers","Legal","Managers","Public\nRelations","Human\nResources","Marketing","Rank and File"))) %>%
    alignment_gridplot(x.col = "R_don_share", y.col = "ideal", grid.col = "empl_type", grid.ncols = 4, lbl.col = "yougov_name", size.col = "ideal.n",
                       x.lab = "Donors (R)", y.lab = "Brand Signal (R)",
                       text.size = 2, x.lim = c(0, 1), y.mid = 0, x.mid = 0.5, annot.quads=TRUE, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.empl.don.bytype$plot, 
           filename = sprintf("figures/align/%s/brands_ideal_empl_don_bytype.pdf",var),
           width = 8, height = 5)
  if (FALSE) {
    r.ideal.empl.don.bytype$plot
  }
  
  ### Consumers: Twitter Followers ----
  r.ideal.cons.twfoll <- d.brands %>% 
    measure_alignment(x.col = "twitter.foll_ideo_slant", y.col = "ideal", lbl.col = "yougov_name", size.col = "ideal.n",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.lab = "Rep. Direction of Scaled Twitter Followers", y.lab = "Rep. Direction of Brand Signal",
                      text.size = 2, x.lim = c(-1, 1), y.mid = 0, x.mid = 0, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.cons.twfoll$plot + 
             facet_wrap( ~ "Twitter Followers") +
             labs(subtitle = "Scale brand followers using coded ideological accounts, weight followers evenly"), 
           filename = sprintf("figures/align/%s/brands_ideal_twfoll.pdf",var),
           width = 6, height = 4.2)
  ggsave_v(r.ideal.cons.twfoll$plot.miss, 
           filename = sprintf("figures/align/%s/brands_ideal_twfoll_missing.pdf",var),
           width = 6, height = 6)
  if (FALSE) {
    r.ideal.cons.twfoll$plot
    r.ideal.cons.twfoll$plot.miss
  }
  
  r.ideal.cons.twfoll2 <- d.brands %>% 
    mutate(twitter.foll_MC_slant = case_when(
      sign(cor(twitter.foll_MC_slant, twitter.foll_ideo_slant, use="pairwise.complete.obs")) == -1 ~ -twitter.foll_MC_slant,
      TRUE ~ twitter.foll_MC_slant
    )) %>%
    measure_alignment(x.col = "twitter.foll_MC_slant", y.col = "ideal", lbl.col = "yougov_name", size.col = "ideal.n",
                      hist.missing = TRUE, ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.lab = "Rep. Direction of Scaled Twitter Followers", y.lab = "Rep. Direction of Brand Signal",
                      text.size = 2, x.lim = c(-1, 1), y.mid = 0, x.mid = 0, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.cons.twfoll2$plot + 
             facet_wrap( ~ "Twitter Followers") +
             labs(subtitle = "Scale brand followers using MCs' accounts, weight followers evenly"), 
           filename = sprintf("figures/align/%s/brands_ideal_twfoll2.pdf",var),
           width = 6, height = 4.2)
  ggsave_v(r.ideal.cons.twfoll2$plot.miss, 
           filename = sprintf("figures/align/%s/brands_ideal_twfoll2_missing.pdf",var),
           width = 6, height = 6)
  if (FALSE) {
    r.ideal.cons.twfoll2$plot
    r.ideal.cons.twfoll2$plot.miss
  }
  
  r.ideal.cons.twfoll3 <- d.brands %>% 
    mutate(sl.Rep_Share.2017_02 = (sl.Rep_Pct.2017_02/(sl.Rep_Pct.2017_02+sl.Dem_Pct.2017_02))*100,
           sl.Rep_Share.2022_10 = (sl.Rep_Pct.2022_10/(sl.Rep_Pct.2022_10+sl.Dem_Pct.2022_10))*100,
           twitter.foll_slant.sl = (sl.Rep_Share.2017_02 + sl.Rep_Share.2022_10)/2) %>%
    measure_alignment(x.col = "twitter.foll_slant.sl", y.col = "ideal", lbl.col = "yougov_name", size.col = "ideal.n",
                      hist.missing = TRUE, ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.lab = "% of Rep. Twitter Followers", y.lab = "Rep. Direction of Brand Signal",
                      text.size = 2, x.lim = c(0, 100), y.mid = 0, x.mid = 50, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.cons.twfoll3$plot + 
             facet_wrap( ~ "Twitter Followers") +
             labs(subtitle = "Using data from social-listening.org"), 
           filename = sprintf("figures/align/%s/brands_ideal_twfoll3.pdf",var),
           width = 6, height = 4.2)
  ggsave_v(r.ideal.cons.twfoll3$plot.miss, 
           filename = sprintf("figures/align/%s/brands_ideal_twfoll3_missing.pdf",var),
           width = 6, height = 6)
  if (FALSE) {
    r.ideal.cons.twfoll3$plot
    r.ideal.cons.twfoll3$plot.miss
  }
  
  ### Employees: HQ ----
  d.hq <- d.brands %>%
    #### aggregate HQ geography measures
    mutate_at(.vars = vars(starts_with("hq_Pres.DEM")), .funs = ~1-.x) %>%
    rowwise() %>%
    mutate(hq_pres.REP.stfips = mean(c(hq_Pres.DEM.08.stfips, hq_Pres.DEM.12.stfips, hq_Pres.DEM.16.stfips), na.rm=T)) %>%
    mutate(hq_pres.REP.zip = mean(c(hq_Pres.DEM.08.zipcode, hq_Pres.DEM.12.zipcode, hq_Pres.DEM.16.zipcode), na.rm=T)) %>%
    mutate(hq_pres.REP = coalesce(hq_pres.REP.zip, hq_pres.REP.stfips)) %>%
    filter(!is.na(hq_pres.REP), !is.na(ideal)) %>%
    #### sanitize data
    mutate(hq = gsub("( city| CDP| municipality| borough| town| government| village| county| County| City| corporation)", "", hq_usloc, ignore.case=T)) %>%
    mutate(hq = gsub("Zcta.*02492.*","Acton",hq)) %>%
    mutate(hq = gsub("Oklahoma*","Oklahoma City",hq)) %>%
    mutate(hq_usstate = ifelse(grepl("New York", hq), "New York", hq_usstate)) %>%
    rowwise() %>%
    mutate(hq_usstate = ifelse(!all(nchar(hq_usstate) == 2,na.rm=T), state.abb[match(hq_usstate, state.name)], hq_usstate)) %>%
    mutate(hq_lbl = paste0(hq,", ",hq_usstate)) %>%
    mutate(hq_lbl = paste0(yougov_name, " (",hq_lbl,")"))
  
  r.ideal.hq <- d.hq %>% 
    measure_alignment(x.col = "hq_pres.REP", y.col = "ideal", lbl.col = "hq_lbl", size.col = "ideal.n",
                      x.lab = "% Pres. Rep. Voteshare in HQ Location (2014-2022)", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS, 
                      text.size = 2, x.lim = c(0, 1), y.mid = 0, x.mid = 0.5, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.hq$plot + 
             geom_point(data = d.hq %>%
                          filter(grepl("(New York|San Francisco)", hq_usloc)),
                        aes(x=hq_pres.REP, y=ideal), 
                        color="black", fill="#619CFF", alpha = 0.8, size=4, shape=23) +
             geom_text_repel(data = d.hq %>%
                               filter(grepl("(New York|San Francisco)", hq_usloc)) %>%
                               arrange(desc(hq_pres.REP)) %>%
                               head(1),
                             aes(x=hq_pres.REP, y=ideal, label="NYC/Bay Area HQ Brands"), 
                             color="#619CFF", size=3, nudge_x=0.3, nudge_y=-40, fontface='bold') +
             facet_wrap( ~ "Firm Headquarters"), 
           filename = sprintf("figures/align/%s/brands_ideal_hq.pdf",var),
           width = 6, height = 4)
  ggsave_v(r.ideal.hq$plot.miss, 
           filename = sprintf("figures/align/%s/brands_ideal_hq_missing.pdf",var),
           width = 6, height = 6)
  if (FALSE) {
    r.ideal.hq$plot
    r.ideal.hq$plot.miss
  }
  
  #### [alternative] county instead of ZIP code ----
  r.ideal.hq.cty <- d.hq %>% 
  	measure_alignment(x.col = "hq_pres.REP.county", y.col = "ideal", lbl.col = "hq_lbl", size.col = "ideal.n",
  					  x.lab = "% Pres. Rep. Voteshare\nin HQ County (2014-2022)", y.lab = "Rep. Direction of Brand Signal",
  					  hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS, 
  					  r.caption.size = 12,
  					  text.size = 2, x.lim = c(0, 1), y.mid = 0, x.mid = 0.5, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.hq.cty$plot +
  		 	theme(axis.title = element_text(size=12)), 
  		   filename = sprintf("figures/align/%s/brands_ideal_hq_cty.pdf",var),
  		   width = 6, height = 6)
  
  ### Consumers: Retail Locations ----
  r.ideal.loc.sg <- d.brands %>%
    measure_alignment(y.col = "ideal", x.col = "sg.pres.REP", size.col = "ideal.n", lbl.col = "yougov_name",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.lab = "% Pres. Rep. Vote (2014-2022) in Business Locations (SafeGraph)", y.lab = "Rep. Direction of Brand Signal",
                      text.size = 2, x.lim = c(0.3, 0.7), y.mid = 0, x.mid = 0.5, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.loc.sg$plot + facet_wrap( ~ "Business Locations"), 
           filename = sprintf("figures/align/%s/brands_ideal_locs_sg.pdf",var),
           width = 6, height = 4)
  ggsave_v(r.ideal.loc.sg$plot, 
           filename = sprintf("figures/align/%s/brands_ideal_locs_sg_missing.pdf",var),
           width = 6, height = 6)
  if (FALSE) {
    r.ideal.loc.sg$plot
    r.ideal.loc.sg$plot.miss
  }
  
  r.ideal.loc.zi <- d.brands %>%
    measure_alignment(y.col = "ideal", x.col = "zi.pres.REP", size.col = "ideal.n", lbl.col = "yougov_name",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.lab = "% Pres. Rep. Vote (2014-2022) in Business Locations (Zippia)", y.lab = "Rep. Direction of Brand Signal",
                      text.size = 2, x.lim = c(0.3, 0.7), y.lim = c(-50, 50), y.mid = 0, x.mid = 0.5, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.loc.zi$plot + facet_wrap( ~ "Business Locations"), 
           filename = sprintf("figures/align/%s/brands_ideal_locs_zi.pdf",var),
           width = 6, height = 4)
  ggsave_v(r.ideal.loc.zi$plot.miss, 
           filename = sprintf("figures/align/%s/brands_ideal_locs_zi_missing.pdf",var),
           width = 6, height = 6)
  if (FALSE) {
    r.ideal.loc.sg$plot
    r.ideal.loc.sg$plot.miss
  }
  
  #### [alternative] county instead of ZIP code ----
  r.ideal.loc.sg.cty <- d.brands %>%
  	measure_alignment(y.col = "ideal", x.col = "sg.pres.REP.county", size.col = "ideal.n", lbl.col = "yougov_name",
  					  hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
  					  x.lab = "% Pres. Rep. Vote (2014-2022) in County of Business Location (SafeGraph)", y.lab = "Rep. Direction of Brand Signal",
  					  r.caption.size = 12,
  					  text.size = 2, x.lim = c(0.3, 0.7), y.mid = 0, x.mid = 0.5, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.loc.sg.cty$plot, 
  		   filename = sprintf("figures/align/%s/brands_ideal_locs_sg_cty.pdf",var),
  		   width = 6, height = 6)
  
  r.ideal.loc.zi.cty <- d.brands %>%
  	measure_alignment(y.col = "ideal", x.col = "zi.pres.REP.county", size.col = "ideal.n", lbl.col = "yougov_name",
  					  hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
  					  x.lab = "% Pres. Rep. Vote (2014-2022) in County of Business Location (Zippia)", y.lab = "Rep. Direction of Brand Signal",
  					  r.caption.size = 12,
  					  text.size = 2, x.lim = c(0.3, 0.7), y.lim = c(-50, 50), y.mid = 0, x.mid = 0.5, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.loc.zi.cty$plot, 
  		   filename = sprintf("figures/align/%s/brands_ideal_locs_zi_cty.pdf",var),
  		   width = 6, height = 6)
  
  r.ideal.loc.cty <- d.brands %>%
  	mutate(pres.REP.county = coalesce(sg.pres.REP.county, zi.pres.REP.county)) %>%
  	measure_alignment(y.col = "ideal", x.col = "zi.pres.REP.county", size.col = "ideal.n", lbl.col = "yougov_name",
  					  hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
  					  x.lab = "% Pres. Rep. Vote (2014-2022) in\nCounty of Business Location", y.lab = "Rep. Direction of Brand Signal",
  					  r.caption.size = 12,
  					  text.size = 2, x.lim = c(0.3, 0.7), y.lim = c(-50, 50), y.mid = 0, x.mid = 0.5, max.overlaps = 5, point.alpha = 0.3)
  ggsave_v(r.ideal.loc.cty$plot +
  		 	theme(axis.title = element_text(size=12)), 
  		   filename = sprintf("figures/align/%s/brands_ideal_locs_cty.pdf",var),
  		   width = 6, height = 6)
  
  ### Politicians: House Reps ----
  
  r.ideal.house <- d.brands %>%
    filter(!grepl("Crown Royal", yougov_name)) %>%
    measure_alignment(y.col = "ideal", x.col = "cong_house_dw_mean", size.col = "ideal.n", lbl.col = "yougov_name",                    
                      x.lab = "Avg. HQ House Rep Nominate", y.lab = "Rep. Direction of Brand Signal",
                      text.size = 2, max.overlaps = 5, point.alpha = 0.2)
  
  ggsave_v(r.ideal.house$plot + facet_wrap( ~ "HQ House Rep. Ideology"), 
           filename = sprintf("figures/align/%s/brands_ideal_locs_zi.pdf",var),
           width = 6, height = 4)
  if (FALSE) {
    r.ideal.house$plot
  }
  
  ### Politicians: Senate Reps ----
  
  r.ideal.sen <- d.brands %>%
    filter(!grepl("Crown Royal", yougov_name)) %>%
    measure_alignment(y.col = "ideal", x.col = "cong_sen_dw_mean", size.col = "ideal.n", lbl.col = "yougov_name",                    
                      x.lab = "Avg. HQ Senate Rep Nominate", y.lab = "Rep. Direction of Brand Signal",
                      text.size = 2, max.overlaps = 5, point.alpha = 0.2)
  
  ggsave_v(r.ideal.sen$plot + facet_wrap( ~ "HQ Senate Rep. Ideology"), 
           filename = sprintf("figures/align/%s/brands_ideal_locs_zi.pdf",var),
           width = 6, height = 4)
  if (FALSE) {
    r.ideal.sen$plot
  }
  
  ### All Stakeholders ----
  
  quantile(d.brands$stkhl.R, na.rm=T)
  
  r.ideal.stkhl.R <- d.brands %>%
    filter(!grepl("Crown Royal", yougov_name)) %>%
    measure_alignment(y.col = "ideal", x.col = "stkhl.R", size.col = "ideal.n", lbl.col = "yougov_name",                
                      x.lab = "Prop. of Shareholders that are Rep.", y.lab = "Rep. Direction of Brand Signal",
                      x.mid = 0.5, text.size = 2, max.overlaps = 5, point.alpha = 0.2)
  ggsave_v(r.ideal.stkhl.R$plot + facet_wrap( ~ "All Stakeholders"), 
           filename = sprintf("figures/align/%s/brands_ideal_stkhl.pdf",var),
           width = 6, height = 4)  
  if (FALSE) {
    r.ideal.stkhl.R$plot
  }  
  
  ### COMBINED GRID ----
  theme_grid_titled <- theme_bw() + theme(strip.background = element_rect(fill="black"),
                                          legend.position = "none",
                                          # strip.text.x = element_blank(),
                                          # strip.text.y = element_blank(),
                                          axis.title = element_text(size=12),
                                          strip.text.x = element_text(size=14, colour="white"))
  theme_grid_titled_bold <- theme_bw() + theme(strip.background = element_rect(fill="black"),
                                               legend.position = "none",
                                               # strip.text.x = element_blank(),
                                               # strip.text.y = element_blank(),
                                               axis.title = element_text(size=12),
                                               strip.text.x = element_text(size=15, colour="white", face="bold"))
  r.ideal.grid.stkhl <-
    suppressMessages({cowplot::plot_grid(
      r.ideal.empl.don$plot + facet_wrap(~ "All Employees + Board") +
        scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. donations", limits=c(0,1)) +
        scale_y_continuous(name = "Brand signal (R)") + 
        theme_grid_titled,
      
      r.ideal.empl.don.bytype$plot.list$Executives + facet_wrap(~ "Executives") +
        scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. donations", limits=c(0,1)) +
        scale_y_continuous(name = "") + 
        theme_grid_titled,
      
      r.ideal.empl.don.bytype$plot.list$Managers + facet_wrap(~ "Managers") +
        scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. donations", limits=c(0,1)) +
        scale_y_continuous(name = "") + 
        theme_grid_titled,
      
      r.ideal.empl.don.bytype$plot.list$`Board\nMembers` + facet_wrap(~ "Board Members") +
        scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. donations", limits=c(0,1)) +
        scale_y_continuous(name = "") + 
        theme_grid_titled,
      
      r.ideal.empl.don.bytype$plot.list$`Rank and File` + facet_wrap(~ "Rank and File Employees") +
        scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. donations", limits=c(0,1)) +
        scale_y_continuous(name = "Brand signal (R)") + 
        theme_grid_titled,
      
      r.ideal.cons.twfoll$plot + facet_wrap(~ "Twitter Followers") +
        scale_x_continuous(name = "Rep. direction of followers") +
        scale_y_continuous(name = "") + 
        theme_grid_titled,
      
      r.ideal.hq$plot + facet_wrap(~ "Voters in HQ ZIP") +
        geom_point(data = d.hq %>%
                     filter(grepl("(New York|San Francisco)", hq_usloc)),
                   aes(x=hq_pres.REP, y=ideal),
                   color="#619CFF", fill="white", alpha = 1, size=4, shape=21) +
        geom_label(data = d.hq %>%
                          filter(grepl("(New York|San Francisco)", hq_usloc)) %>%
                          arrange(desc(hq_pres.REP)) %>%
                          head(1),
                        aes(x=hq_pres.REP, y=ideal, label="NYC/Bay Area HQ Brands"),
                        color="#619CFF", size=3, nudge_x=0.1, nudge_y=-40, fontface='bold', segment.size=2) +
      	scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. Pres vote (2012-2016)", limits=c(0,1)) +
        coord_cartesian(xlim = c(0,1), clip = "off") +
        scale_y_continuous(name = "") + 
        theme_grid_titled,
      
      r.ideal.loc.zi$plot + facet_wrap(~ "Voters in Business/Retail ZIP") +
        scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. Pres vote (2012-2016)", limits=c(0,1)) +
        scale_y_continuous(name = "") + 
        theme_grid_titled,
      
      r.ideal.house$plot + facet_wrap( ~ "Ideology of HQ House Rep.") +
        scale_x_continuous(name = "DW-nominate (2012-2018)") +
        scale_y_continuous(name = "Brand signal (R)") + 
        theme_grid_titled,
      
      r.ideal.sen$plot + facet_wrap(~ "Ideology of HQ Senator") +  
        scale_x_continuous(name = "DW-nominate (2012-2018)") +
        scale_y_continuous(name = "") + 
        theme_grid_titled,
      
      r.ideal.stkhl.R$plot + facet_wrap(~ "All Stakeholders") +
        scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. stakeholders", limits=c(0,1)) +
        scale_y_continuous(name = "") + 
        theme_grid_titled_bold
    )})
  ggsave_v(r.ideal.grid.stkhl, 
           filename = sprintf("figures/align/%s/brands_ideal_all_stkhl.pdf",var),
           width = 13, height = 8)
  
  ## 6.2. Stakeholders' demographics ---------------------------------------------
  
  ### Consumers: YouGov Audience survey ----
  
  #### scaled text ----
  r.ideal.yougov_aud <- list()
  
  r.ideal.yougov_aud$ols.robust.fits.tidy <- d.brands %>%
    select(ideal, starts_with("yougov_aud"), -matches("indus_cens"), -contains("income", ignore.case = F)) %>%
    gather(key="sgmt", value="pct", -ideal) %>%
    mutate(pct = as.numeric(gsub("\\%","",pct))) %>%
    filter(!is.na(pct)) %>%
    group_by(sgmt) %>%
    do(reg = tidy(lm_robust(scale(ideal) ~ scale(pct), data = .))) %>%
    unnest(reg) %>%
    rowwise() %>%
    mutate(var_type = strsplit(sgmt, split="\\.")[[1]][2],
           var_val = strsplit(sgmt, split="\\.")[[1]][3]) %>%
    filter(!grepl("Intercept",term))
  
  x.abs_max <- 
    with(r.ideal.yougov_aud$ols.robust.fits.tidy, 
         max(abs(c(estimate-2*std.error, estimate+2*std.error))))
  
  r.ideal.yougov_aud$plot <- r.ideal.yougov_aud$ols.robust.fits.tidy %>%
    mutate(sig = ifelse(p.value < 0.05, "black", "gray"),
           var_val = gsub("_"," ",var_val),
           var_val = gsub("\\(.*","",var_val),
           var_type = case_when(
             grepl("Educ", var_type, ignore.case=T) ~ "Educ. Status",
             grepl("Empl", var_type, ignore.case=T) ~ "Empl. Status",
             TRUE ~ var_type
           ),
           var_type = paste0("% ", stringr::str_to_title(var_type))) %>%
    group_by(var_type) %>%
    arrange(-estimate) %>%
    mutate(var_val = as_factor(var_val)) %>%  
    ggplot(aes(y=var_val, color=sig)) +
    geom_vline(xintercept = 0, lty = 2) +
    geom_pointrange(aes(xmin=estimate-2*std.error, x=estimate, xmax=estimate+2*std.error), size=0.5) +
    geom_pointrange(aes(xmin=estimate-1.65*std.error, x=estimate, xmax=estimate+1.65*std.error), size=1) +
    facet_grid(var_type ~ ., space = "free_y", scale = "free_y") +
    scale_color_identity() +
    ylab("") + xlab("Standardized Coefficient (DV: Brand Signal)") +
    xlim(c(-x.abs_max, x.abs_max)) +
    labs(caption = expression(NULL %<-% bold("More Democrat                                                               More Republican") %->% NULL)) +
    theme_custom_vertpanel +
    theme(plot.caption = element_text(hjust = 0.5))
  ggsave_v(r.ideal.yougov_aud$plot, 
           file = sprintf("figures/align/%s/brands_ideal_yougov_aud.pdf",var), 
           height=7, width=8)
  rm(x.abs_max)
  if (FALSE) {
    r.ideal.yougov_aud$plot
  }
  
  #### relevant keywords ----
  colnames(d.brands)[grepl("yougov_aud", colnames(d.brands))]
  
  r.kwd_Women.yougov_aud.Women <- d.brands %>% 
    mutate_at(vars(starts_with("kwd")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`kwd.Women` = log(`kwd.Women`+LOG_OFFSET)) %>% 
    measure_alignment(x.col = "yougov_aud.gender.Female", y.col = "kwd.Women", lbl.col = "yougov_name",
                      x.lab = "% of Female Consumers", y.lab = "log(Usage of Women Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.kwd_Women.yougov_aud.Women$plot
  }
  
  ### Employees: Zippia Estimates ----
  
  #### scaled text ----
  
  r.ideal.zippia_empl <- list()
  
  r.ideal.zippia_empl$ols.robust.fits.tidy <- d.brands %>%
    select(ideal, starts_with("zippia_empl"), -matches("Most Common")) %>%
    gather(key="sgmt", value="pct", -ideal) %>%
    mutate(pct = as.numeric(gsub("\\%","",pct))) %>%
    filter(!is.na(pct)) %>%
    group_by(sgmt) %>%
    do(reg = tidy(lm_robust(scale(ideal) ~ scale(pct), data = .))) %>%
    unnest(reg) %>%
    rowwise() %>%
    mutate(var_type = strsplit(sgmt, split="\\.")[[1]][2],
           var_val = strsplit(sgmt, split="\\.")[[1]][3]) %>%
    filter(!grepl("Intercept",term))
  
  x.abs_max <- 
    with(r.ideal.zippia_empl$ols.robust.fits.tidy, max(abs(c(estimate-2*std.error, estimate+2*std.error))))
  
  r.ideal.zippia_empl$plot <- r.ideal.zippia_empl$ols.robust.fits.tidy %>%
    mutate(sig = ifelse(p.value < 0.05, "black", "gray"),
           var_val = gsub("_"," ",var_val),
           var_type = case_when(
             grepl("Age", var_type, ignore.case=T) ~ "Age",
             grepl("Gender", var_type, ignore.case=T) ~ "Gender",
             grepl("Degree", var_type, ignore.case=T) ~ "Educ. Status",
             TRUE ~ var_type
           ),
           var_type = paste0("% ", stringr::str_to_title(var_type))) %>%
    group_by(var_type) %>%
    arrange(-estimate) %>%
    mutate(var_val = as_factor(var_val)) %>%
    ggplot(aes(y=var_val, color=sig)) +
    geom_vline(xintercept = 0, lty = 2) +
    geom_pointrange(aes(xmin=estimate-2*std.error, x=estimate, xmax=estimate+2*std.error), size=0.5) +
    geom_pointrange(aes(xmin=estimate-1.65*std.error, x=estimate, xmax=estimate+1.65*std.error), size=1) +
    facet_grid(var_type ~ ., space = "free_y", scale = "free_y") +
    scale_color_identity() +
    ylab("") + xlab("Standardized Coefficient (DV: Brand Signal)") +
    xlim(c(-x.abs_max, x.abs_max)) +
    labs(caption = expression(NULL %<-% bold("More Democrat                                                               More Republican") %->% NULL)) +
    theme_custom_vertpanel +
    theme(plot.caption = element_text(hjust = 0.5))
  ggsave_v(r.ideal.zippia_empl$plot, 
           file = sprintf("figures/align/%s/brands_ideal_empl_zi.pdf",var), 
           height=6, width=8)
  if (FALSE) {
    r.ideal.zippia_empl$plot
  }
  
  #### relevant keywords ----
  colnames(d.brands)[grepl("zippia", colnames(d.brands))]
  
  r.kwd_Nonwhite.zi.Nonwhite <- d.brands %>% 
    mutate_at(vars(starts_with("kwd")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`zi.Nonwhite`= 100 - zippia_empl.Ethnicity.White) %>%
    mutate(`kwd.Nonwhite` = log(`kwd.Black Community`+`kwd.Asian/AAPI Community`+`kwd.Hispanic/Latino Community`+`kwd.Racial Justice`+`kwd.Indigenous/Native Community`+LOG_OFFSET)) %>% 
    measure_alignment(x.col = "zi.Nonwhite", y.col = "kwd.Nonwhite", lbl.col = "yougov_name",
                      x.lab = "% of Non-White Employees", y.lab = "log(Usage of Racial Minority Group/Issues Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  
  r.kwd_Women.zi.Female <- d.brands %>% 
    mutate_at(vars(starts_with("kwd")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`kwd.Women` = log(`kwd.Women`+LOG_OFFSET)) %>% 
    measure_alignment(x.col = "zippia_empl.Genders.Female", y.col = "kwd.Women", lbl.col = "yougov_name",
                      x.lab = "% of Female Employees", y.lab = "log(Usage of Women Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.kwd_Nonwhite.zi.Nonwhite$plot
    r.kwd_Women.zi.Female$plot
  }

  ## 6.3. Firm activities --------------------------------------------------------
  
  ### 6.3.1. Employees: HRC ratings ----
  
  #### scaled text ----
  r.ideal.hrc <- d.brands %>% 
    measure_alignment(x.col = "hrc_rating", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "HRC Average Score", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.hrc$plot
    r.ideal.hrc$plot.miss
  }
  
  #### relevant keywords ----
  r.kwd_LGBTQ.hrc <- d.brands %>% 
    mutate(kwd.LGBTQ = log(kwd.LGBTQ+LOG_OFFSET)) %>%
    measure_alignment(x.col = "hrc_rating", y.col = "kwd.LGBTQ", lbl.col = "yougov_name",
                      x.lab = "HRC Score (2021)", y.lab = "log(Usage of LGBTQ Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.kwd_LGBTQ.hrc$plot
  }
  
  ### 6.3.2. Employees: Glassdoor ratings ----
  
  #### scaled text ----
  r.ideal.gd.black <- d.brands %>% 
    measure_alignment(x.col = "gd.Race / Ethnicity.Black or African American.avg_rating", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "Glassdoor African American Employee Ratings", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.gd.black$plot
    r.ideal.gd.black$plot.miss
  }
  
  r.ideal.gd.dei <- d.brands %>% 
    measure_alignment(x.col = "glassdoor_Rating.Diversity and Inclusion", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "Glassdoor Diversity, Inclusiong & Inclusion (DEI) Ratings", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.gd.dei$plot
    r.ideal.gd.dei$plot.miss
  }
  
  r.ideal.gd.women <- d.brands %>% 
    measure_alignment(x.col = "gd.Gender.Women.avg_rating", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "Glassdoor Women Employees Ratings", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.gd.women$plot
    r.ideal.gd.women$plot.miss
  }
  
  r.ideal.gd.lgbtq <- d.brands %>% 
    measure_alignment(x.col = "gd.Sexual Orientation.LGBTQ+.avg_rating", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "Glassdoor LGBTQ Employee Ratings", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.gd.lgbtq$plot
    r.ideal.gd.lgbtq$plot.miss
  }
  
  #### relevant keywords ----
  colnames(d.brands)[grepl("gd.Race / Ethnicity", colnames(d.brands))]
  
  d.brands$`gd.Nonwhite.avg_rating` <- rowMeans(select(d.brands, 
                                                       `gd.Race / Ethnicity.Asian.avg_rating`,
                                                       `gd.Race / Ethnicity.Black or African American.avg_rating`,
                                                       `gd.Race / Ethnicity.Hispanic or Latinx.avg_rating`,
                                                       `gd.Race / Ethnicity.Indigenous American or Alaska Native.avg_rating`,
                                                       `gd.Race / Ethnicity.Middle Eastern.avg_rating`,
                                                       `gd.Race / Ethnicity.Native Hawaiian or Other Pacific Islander.avg_rating`), na.rm = TRUE)
  
  r.kwd_Nonwhite.gd.nonwhite <- d.brands %>% 
    mutate_at(vars(starts_with("kwd")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`kwd.Nonwhite` = log(`kwd.Black Community`+`kwd.Asian/AAPI Community`+`kwd.Hispanic/Latino Community`+`kwd.Racial Justice`+`kwd.Indigenous/Native Community`+LOG_OFFSET)) %>% 
    measure_alignment(x.col = "gd.Nonwhite.avg_rating", y.col = "kwd.Nonwhite", lbl.col = "yougov_name",
                      x.lab = "Glassdoor Non-White Employee Ratings (Average)", y.lab = "log(Usage of Racial Minority Group/Issues Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  
  r.kwd_LGBTQ.gd.lgbtq <- d.brands %>% 
    mutate_at(vars(starts_with("kwd")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`kwd.LGBTQ` = log(`kwd.LGBTQ`+LOG_OFFSET)) %>% 
    measure_alignment(x.col = "gd.Sexual Orientation.LGBTQ+.avg_rating", y.col = "kwd.LGBTQ", lbl.col = "yougov_name",
                      x.lab = "Glassdoor LGBTQ Employee Ratings", y.lab = "log(Usage of LGBTQ Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  
  r.kwd_Women.gd.women <- d.brands %>% 
    mutate_at(vars(starts_with("kwd")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`kwd.Women` = log(`kwd.Women`+LOG_OFFSET)) %>% 
    measure_alignment(x.col = "gd.Gender.Women.avg_rating", y.col = "kwd.Women", lbl.col = "yougov_name",
                      x.lab = "Glassdoor Women Employee Ratings", y.lab = "log(Usage of Women Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  
  r.kwd_Minorities.gd.dei <- d.brands %>% 
    mutate_at(vars(starts_with("kwd")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`kwd.Minorities` = log(`kwd.Black Community`+`kwd.Asian/AAPI Community`+`kwd.Hispanic/Latino Community`+`kwd.Women`+`kwd.LGBTQ`+`kwd.Racial Justice`+`kwd.Indigenous/Native Community`+LOG_OFFSET)) %>% 
    measure_alignment(x.col = "glassdoor_Rating.Diversity and Inclusion", y.col = "kwd.Minorities", lbl.col = "yougov_name",
                      x.lab = "Glassdoor Diversity, Inclusiong & Inclusion (DEI) Ratings", y.lab = "log(Usage of Minority Group/Issues Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  
  ### 6.3.3. Legislative Lobbying ----
  r.ideal.firm.legis <- d.brands %>% 
    measure_alignment(x.col = "legis.R_frac", y.col = "ideal", lbl.col = "yougov_name", size.col = "legis.n",
                      x.lab = "Share of Rep. Legislators Lobbied by Parent Firm/Affiliates", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      text.size = 4, x.lim = c(0, 1), y.mid = 0, x.mid = 0.5)
  ggsave_v(r.ideal.firm.legis$plot + facet_wrap(~ "Legislative Lobbying"), 
           filename = sprintf("figures/align/%s/brands_ideal_legis.pdf",var),
           width = 6, height = 4)
  ggsave_v(r.ideal.firm.legis$plot.miss, 
           filename = sprintf("figures/align/%s/brands_ideal_legis_missing.pdf",var),
           width = 6, height = 4)
  if (FALSE){
    r.ideal.firm.legis$plot
  }
  
  ### 6.3.4. PAC Contributions ----
  r.ideal.firm.pac.groups <- d.brands %>% 
    mutate(opsec.cand.org_dollars = log(opsec.cand.org_dollars+LOG_OFFSET)) %>%
    measure_alignment(x.col = "opsec.R_share.org_dollars", y.col = "ideal", lbl.col = "yougov_name", size.col = "opsec.cand.org_dollars",
                      x.lab = "Share of $ Spent on Rep. Groups by Parent Firm PAC Affiliates", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      text.size = 4, x.lim = c(0, 1), y.mid = 0, x.mid = 0.5)
  ggsave_v(r.ideal.firm.pac.groups$plot + facet_wrap(~ "PAC Contributions"), 
           filename = sprintf("figures/align/%s/brands_ideal_pac_groups.pdf",var),
           width = 6, height = 4)
  ggsave_v(r.ideal.firm.pac.groups$plot.miss, 
           filename = sprintf("figures/align/%s/brands_ideal_pac_groups_missing.pdf",var),
           width = 6, height = 4)
  if (FALSE){
    r.ideal.firm.pac.groups$plot
  }
  
  r.ideal.firm.pac.cands <- d.brands %>% 
    mutate(opsec.cand.org_dollars = log(opsec.cand.org_dollars+LOG_OFFSET)) %>%
    measure_alignment(x.col = "opsec.R_cand_share.org_dollars", y.col = "ideal", lbl.col = "yougov_name", size.col = "opsec.cand.org_dollars",
                      x.lab = "Share of $ Spent on Rep. Candidates by Parent Firm PAC Affiliates", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      text.size = 4, x.lim = c(0, 1), y.mid = 0, x.mid = 0.5)
  ggsave_v(r.ideal.firm.pac.cands$plot + facet_wrap( ~ "PAC Contributions" ), 
           filename = sprintf("figures/align/%s/brands_ideal_pac_cands.pdf",var),
           width = 6, height = 4)
  ggsave_v(r.ideal.firm.pac.cands$plot.miss, 
           filename = sprintf("figures/align/%s/brands_ideal_pac_cands_missing.pdf",var),
           width = 6, height = 4)
  if (FALSE){
    r.ideal.firm.pac.cands$plot
  }
  
  #### combined partisan preferences grid ----

  theme_grid_titled_bold <- theme_bw() + theme(strip.background = element_rect(fill="black"),
                                               legend.position = "none",
                                               # strip.text.x = element_blank(),
                                               # strip.text.y = element_blank(),
                                               axis.title = element_text(size=12),
                                               strip.text.x = element_text(color="white", face="bold", size=15))
  r.ideal.grid.actv <- cowplot::plot_grid(
    # r.ideal.firm.legis$plot + facet_wrap(~"Legislative Lobbying") +
    #   scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. legislators", limits=c(0,1)) +
    #   scale_y_continuous(name = "Brand signal (R)") + 
    #   theme_grid_titled_bold,
    
    r.ideal.firm.pac.groups$plot + facet_wrap(~"PAC Spending (Groups)") +
      scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. donations", limits=c(0,1)) +
      scale_y_continuous(name = "Brand signal (R)") + 
      theme_grid_titled_bold,
  
    r.ideal.firm.pac.cands$plot + facet_wrap(~"PAC Spending (Candidates)") +
      scale_x_continuous(labels = scales::percent_format(1), name = "% Rep. donations", limits=c(0,1)) +
      scale_y_continuous(name = "Brand signal (R)") + 
      theme_grid_titled_bold,
    nrow = 1
  )
  ggsave_v(r.ideal.grid.actv, 
           filename = sprintf("figures/align/%s/brands_ideal_all_actv.pdf",var),
           width = 9, height = 4)
  
  ### 6.3.5. Climate Activities ----
  
  #### scaled text ----
  r.ideal.cdp <- d.brands %>%
    measure_alignment(x.col = "cdp_avg_score", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "CDP Score", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.cdp$plot
    r.ideal.cdp$plot.miss
  }
  
  r.ideal.clm100_discl.7_1 <- d.brands %>% 
    filter(!is.na(`clm100_discl.7.1 Metric assessment.a.Mar-22`)) %>%
    histogram_plot(x.lab = "Rep. Direction of Brand Signal",
                   x.col = "ideal",
                   group.lab = "Brand commits to conduct all lobbying\nin line with Paris Agreement?",
                   group.col = "clm100_discl.7.1 Metric assessment.a.Mar-22")
  if (FALSE) {
    r.ideal.clm100_discl.7_1$plot
    r.ideal.clm100_discl.7_1$plot.miss
  }
  
  r.ideal.clm100_policy.org <- d.brands %>% 
    measure_alignment(x.col = "clm100_policy.Organisation Score.March 2022", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "Climate Action 100+ Organization Score (March 2022)", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = TRUE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.clm100_policy.org$plot
    r.ideal.clm100_policy.org$plot.miss
  }
  
  #### related keywords ----
  
  r.kwd_Climate.cdp <- d.brands %>% 
    mutate(`kwd.Climate/Environment` = log(`kwd.Climate/Environment`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "cdp_avg_score", y.col = "kwd.Climate/Environment", lbl.col = "yougov_name",
                      x.lab = "CDP Score", y.lab = "log(Usage of Climate/Environment Keywords)",
                      hist.missing = TRUE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.kwd_Climate.cdp$plot
  }
  
  r.kwd_Climate.clm100_policy.org <- d.brands %>% 
    mutate(`kwd.Climate/Environment` = log(`kwd.Climate/Environment`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "clm100_policy.Engagement Intensity Score.March 2022", y.col = "kwd.Climate/Environment", lbl.col = "yougov_name",
                      x.lab = "Climate Action 100+ Engagement Organization Score", y.lab = "log(Usage of Climate/Environment Keywords)",
                      hist.missing = TRUE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.kwd_Climate.clm100_policy.org$plot
  }
  
  r.kwd_broad_clm.clm100_policy.intens <- d.brands %>% 
    mutate(`kwd.Climate/Environment` = log(`kwd.Climate/Environment`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "clm100_policy.Engagement Intensity Score.March 2022", y.col = "kwd.Climate/Environment", lbl.col = "yougov_name",
                      x.lab = "Climate Action 100+ Engagement Intensity Score", y.lab = "log(Usage of Climate/Environment Keywords)",
                      hist.missing = TRUE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.kwd_Climate.clm100_policy.intens$plot
  }
  
  r.kwd_Climate.clm100_discl.7_1 <- d.brands %>% 
    mutate(`kwd.Climate/Environment` = log(`kwd.Climate/Environment`+LOG_OFFSET)) %>%
    filter(!is.na(`clm100_discl.7.1 Metric assessment.a.Mar-22`)) %>%
    histogram_plot(x.lab = "log(Usage of Climate/Environment Keywords)",
                   x.col = "kwd.Climate/Environment",
                   group.lab = "Brand commits to conduct all lobbying\nin line with Paris Agreement?",
                   group.col = "clm100_discl.7.1 Metric assessment.a.Mar-22")
  if (FALSE) {
    r.kwd_Climate.clm100_discl.7_1$plot
  }
  
  ### 6.3.6. Regulatory Violations: Discrimination ----
  
  #### scaled text ----
  
  r.ideal.gjf.discr.n <- d.brands %>% 
    mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`gjf.n_off.discr`= log(`gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "gjf.n_off.discr", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "log(Discrimination Violations)", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = FALSE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.ideal.gjf.discr.n$plot
  }
  
  #### related keywords ----
  
  r.kwd_Minorities.gjf.discr.n <- d.brands %>% 
    mutate_at(vars(starts_with("kwd")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`kwd.Minorities` = log(`kwd.Black Community`+`kwd.Asian/AAPI Community`+`kwd.Hispanic/Latino Community`+`kwd.Women`+`kwd.LGBTQ`+`kwd.Racial Justice`+`kwd.Indigenous/Native Community`+LOG_OFFSET)) %>% 
    mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`gjf.n_off.discr`= log(`gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "gjf.n_off.discr", y.col = "kwd.Minorities", lbl.col = "yougov_name",
                      x.lab = "log(Discrimination Violations)", y.lab = "log(Usage of Minority Group/Issues Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.kwd_Minorities.gjf.discr.n$plot
  }
  
  ### 6.3.7. Regulatory Violations: Labor ----
  colnames(d.brands)[grepl("gjf", colnames(d.brands))]
  
  #### scaled text ----
  
  r.ideal.gjf.labor.n <- d.brands %>% 
    mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`gjf.n_off.labor` = log(`gjf.n_off.wage and hour violation`+`gjf.n_off.labor relations violation`+`gjf.n_off.workplace safety or health violation`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "gjf.n_off.labor", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "log(Labor Violations)", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = FALSE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.gjf.labor.n$plot
  }
  
  #### related keywords ----
  
  r.kwd_Labor.gjf.labor.n <- d.brands %>% 
    mutate(`kwd.Labor` = log(`kwd.Labor`+LOG_OFFSET)) %>%
    mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`gjf.n_off.labor` = log(`gjf.n_off.wage and hour violation`+`gjf.n_off.labor relations violation`+`gjf.n_off.workplace safety or health violation`+`gjf.n_off.employment discrimination`+`gjf.n_off.discriminatory practices (non-employment)`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "gjf.n_off.labor", y.col = "kwd.Labor", lbl.col = "yougov_name",
                      x.lab = "log(Labor Violations)", y.lab = "log(Usage of Labor Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.kwd_Labor.gjf.labor.n$plot
  }
  
  ### 6.3.8. Regulatory Violations: Environmental ----
  colnames(d.brands)[grepl("gjf", colnames(d.brands))]
  
  #### scaled text ----
  
  r.ideal.gjf.environ.n <- d.brands %>% 
    mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`gjf.n_off.environ` = log(`gjf.n_off.environmental violation`+`gjf.n_off.energy conservation violation`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "gjf.n_off.environ", y.col = "ideal", lbl.col = "yougov_name",
                      x.lab = "log(Environmental Violations)", y.lab = "Rep. Direction of Brand Signal",
                      hist.missing = FALSE, align.condition = ideal.n > MIN_PARTISAN_BIGRAMS,
                      x.mid = NA, annot.quads = FALSE)
  if (FALSE) {
    r.ideal.gjf.environ.n$plot
  }
  
  #### related keywords ----
  
  r.kwd_Climate.gjf.environ.n <- d.brands %>% 
    mutate(`kwd.Climate/Environment` = log(`kwd.Climate/Environment`+LOG_OFFSET)) %>%
    mutate_at(vars(starts_with("gjf.n_off")), ~ifelse(is.na(.x), 0, .x)) %>%
    mutate(`gjf.n_off.environ` = log(`gjf.n_off.environmental violation`+`gjf.n_off.energy conservation violation`+LOG_OFFSET)) %>%
    measure_alignment(x.col = "gjf.n_off.environ", y.col = "kwd.Climate/Environment", lbl.col = "yougov_name",
                      x.lab = "log(Environmental Violations)", y.lab = "log(Usage of Climate/Environment Keywords)",
                      hist.missing = FALSE,
                      x.mid = NA, y.mid = log(LOG_OFFSET), annot.quads = FALSE)
  if (FALSE) {
    r.kwd_Climate.gjf.environ.n$plot
  }
  
  print(Sys.time()-t0)
}

# 7.0.) PREP FOR MEASUREMENT ----------------------------------------------

d.brands <- d.brands %>%
  mutate(rev_mil.final = coalesce(rev_mil, zippia_rev/1000000, glassdoor_rev_usd/1000000),
         rev_mil.final.log = log(rev_mil+LOG_OFFSET),
         num_empl.final = coalesce(num_empl, zippia_num_empl, orbis_num_empl, glassdoor_num_empl),
         num_empl.final.log = log(num_empl+LOG_OFFSET)) 

d.brands$num_empl.2 = cut(d.brands$num_empl.final.log, breaks=2, include.lowest=T, labels=c("low","high"))
d.brands$rev_mil.2 = cut(d.brands$rev_mil.final.log, breaks=2, include.lowest=T, labels=c("low","high"))

d.brands$num_empl.2 = cut_number(d.brands$num_empl.final.log, n=2, labels=c("low","high"))
d.brands$rev_mil.2 = cut_number(d.brands$rev_mil.final.log, n=2, labels=c("low","high"))

d.brands$tw_count.2 = cut_number(log(d.brands$tw_count), n=2, labels=c("low","high"))
d.brands$tw_followers.2 = cut_number(log(d.brands$tw_followers), n=2, labels=c("low","high"))

table(d.brands$num_empl.2)
table(d.brands$rev_mil.2)

d.brands <- d.brands %>%
  mutate_at(vars(starts_with("clm100_discl.Sub-indicator")), ~ifelse(.x=="Y"|.x==1,1,0))

d.brands.quad.mids <- list("R_don_share"=0.5,
                           "R_don_share.Board_Member"=0.5,
                           "R_don_share.Managers"=0.5,
                           "R_don_share.Legal"=0.5,
                           "R_don_share.Human_Resources"=0.5,
                           "R_don_share.Top_Exec"=0.5,
                           "R_don_share.Public_Relations"=0.5,
                           "R_don_share.Marketing"=0.5,
                           "R_don_share.Rank_and_File"=0.5,
                           "twitter.foll_ideo_slant"=0,
                           "sl.Rep_Pct.2017_02"=0.5,
                           "sl.Rep_Pct.2022_10"=0.5,
                           "hq_pres.REP"=0.5, 
                           "zi.pres.REP"=0.5, 
                           "sg.pres.REP"=0.5,
                           "cong_house_dw_mean"=0, 
                           "cong_sen_dw_mean"=0,
                           "stkhl.R"=0.5,
                           "legis.R_frac" = 0.5,
                           "opsec.R_share.org_dollars" = 0.5,
                           "opsec.R_cand_share.org_dollars" = 0.5)
d.brands.quad.mids <- append(d.brands.quad.mids,
                             list("ideal.main"=0,
                                  "ideal.main.bin"=0,
                                  "ideal.tw"=0,
                                  "ideal.ig"=0,
                                  "ideal.mdl"=0,
                                  "ideal.stances"=0))

d.brands$`zippia_empl.Ethnicity.Non-White` <- 100-d.brands$`zippia_empl.Ethnicity.White`
d.brands.y$`zippia_empl.Ethnicity.Non-White` <- 100-d.brands.y$`zippia_empl.Ethnicity.White`

d.brands$hq_main_in_US[is.na(d.brands$hq_main_in_US)] <- 1

exp(quantile(d.brands$num_empl.final.log, probs = c(0.33, 0.66), na.rm=T))

d.brands <- d.brands %>%
  mutate(yougov_brand_category.2 = case_when(
    grepl("(finan|bank|insuran)", yougov_brand_category) ~ "Finance/Insurance",
    grepl("(tool|hardware|applianc)", yougov_brand_category) ~ "Tools/Appliances",
    grepl("(tech|electr)", yougov_brand_category) ~ "Tech/Electronics",
    grepl("(retail|cloth)", yougov_brand_category) ~ "Retail/Clothing",
    grepl("(good|drink|groc|dining)", yougov_brand_category) ~ "Food/Food Services",
    grepl("(beer|spirit|bever)", yougov_brand_category) ~ "Beverages",
    grepl("(hotel|airline|travel)", yougov_brand_category) ~ "Hospitality",
    # TRUE ~ NA_character_
    TRUE ~ stringr::str_to_title(yougov_brand_category)
  ))

# 7.) MEASURE ALIGNMENT ROBUSTLY ----------------------------------------------

## 7.1. Ideal Points ~ Stakeholders --------------------------------------------

specifs.ideal.stkhl <- build_specifs(
  data = d.brands,
  x.vars = c(colnames(d.brands)[grepl("^R_don_share",colnames(d.brands))],
             "twitter.foll_ideo_slant","sl.Rep_Pct.2017_02","sl.Rep_Pct.2022_10",
             "hq_pres.REP", "zi.pres.REP", "sg.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R"),
  y.vars = c("ideal.main"),
  x.mids = d.brands.quad.mids,
  y.mids = d.brands.quad.mids,
  se.cluster.vars = c(),
  scale.y = TRUE,
  scale.x = TRUE
)

results.ideal.stkhl <- measure_alignments_robustly(
  # data = d.brands, 
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  lm.specifs = specifs.ideal.stkhl, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T,
  equiv.test = T,
  parallel.cores = 12
)

### Visualise regression estimates ----
p.reg.ideal.stkhl <- results.ideal.stkhl %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  mutate(x.name = sanitize_var(x)) %>%
  mutate(x.cat = categorize_var(x)) %>%
  mutate(x.name = gsub("R\\.", "D.", x.name)) %>% ## orient according to Dem signal
  mutate(x.name = gsub("\\% R", "% D.", x.name)) %>% ## orient according to Dem signal
  arrange(estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  # mutate(color = ifelse(p.value < p.value.adj.alpha, "black", "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=1, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=2, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  facet_grid(x.cat ~ ., scales = "free", space = "free") +
  scale_x_continuous(name = TeX("Coefficient estimate")) +
  geom_text(data = data.frame(x.cat = "Employees"), 
            x=Inf, y=0, size=3, label=TeX("More D. brand signal $\\rightarrow$"), 
            hjust=1, vjust=-1, inherit.aes=F) +
  scale_y_discrete(name = "") +
  scale_color_identity() +
  scale_shape_manual(values = 21:24) +
  theme_custom +
  theme(legend.position="none",
        strip.text.y=element_text(angle=0))
ggsave_v(p.reg.ideal.stkhl, 
         filename = "figures/reg/stkhl.pdf",
         width=8, height=4)

### Visualise influence statistics ----
p.zaminfl.pct.ideal.stkhl <- results.ideal.stkhl %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  select(y, x, zaminfl.both.change.prop_drop) %>%
  #select(y, x, zaminfl.sign.change.prop_drop, zaminfl.sig.change.prop_drop, zaminfl.both.change.prop_drop) %>%
  gather(key="type", value="value", -y, -x) %>%
  mutate(x.name = sanitize_var(x)) %>%
  mutate(x.cat = gsub("\n"," ", categorize_var(x))) %>%
  arrange(value) %>%
  mutate(x.name=as_factor(x.name)) %>%  
  filter(!is.na(value)) %>%
  ggplot(aes(y=x.name, x=value)) +
  #ggplot(aes(y=x, x=value, group=type, color=type)) +
  facet_grid(x.cat ~ ., scales = "free", space = "free") +
  geom_vline(xintercept=0, lty=2) +
  geom_pointrange(aes(x=value, xmin=0, xmax=value), position=position_dodge(width=0.5), size=1) +
  scale_x_continuous(labels = scales::percent_format(1), name = "% of influential observations dropped to significantly change results") +
  scale_y_discrete(name = "") +
  theme_custom +
  theme(legend.position="none",
        strip.text.y=element_text(angle=0))
ggsave_v(p.zaminfl.pct.ideal.stkhl, 
         filename = "figures/reg/stkhl_zaminfl.pdf",
         width=8, height=4)

### Visualise equivalence tests ----
p.tost.ideal.stkhl <- results.ideal.stkhl %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  mutate(x.name = sanitize_var(x)) %>%
  arrange(estimate) %>%
  mutate(x.name=as_factor(x.name)) %>%   
  mutate(x.cat = gsub("\n"," ", categorize_var(x))) %>%
  mutate(color = case_when(tost.0.20.u.p.value.adj.sig ~ "black",
                           tost.0.20.pval < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=-0.20, lty=2) +
  geom_vline(xintercept=0.20, lty=2) +
  geom_linerange(aes(xmin=tost.0.20.u.ci.95.lwr, xmax=tost.0.20.u.ci.95.upr), size=3, alpha=0.8) + 
  geom_point(size=3, shape=21, fill="white") +
  #ggtitle("significant p-value = no meaningful difference (low threshold)") +
  #facet_grid(~ y, scales = "free") +
  facet_grid(x.cat ~ ., scales = "free", space = "free") +
  scale_x_continuous(name = TeX("Coefficient estimate relative to 95% TOST interval")) +
  scale_y_discrete(name = "") +  
  scale_color_identity() +
  theme_custom +
  theme(legend.position="none",
        strip.text.y=element_text(angle=0))
ggsave_v(p.tost.ideal.stkhl, 
         filename = "figures/reg/stkhl_tost.pdf",
         width=8, height=4)

### Visualise quadrant statistics ----
p.quad.ideal.stkhl <- results.ideal.stkhl %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  mutate(x.name = sanitize_var(x)) %>%
  arrange(estimate) %>%
  mutate(x.name=as_factor(x.name)) %>%
  mutate(x.cat = categorize_var(x)) %>%
  # mutate(color = ifelse(quad.binom1s.pval < GLOBAL_ALPHA_THRESHOLD, "black", "gray")) %>%
  mutate(color = ifelse(quad.align.pct.ci.95.lwr < 0.5 & quad.align.pct.ci.95.upr > 0.5, 0.5, quad.align.pct.mean)) %>%
  ggplot(aes(y=x.name)) +
  geom_segment(aes(x=0.5, xend=quad.align.pct.mean, y=x.name, yend=x.name, color=color), 
               stat="identity", size=5) +
  geom_errorbar(aes(x=quad.align.pct.mean, xmin=quad.align.pct.ci.95.lwr, xmax=quad.align.pct.ci.95.upr), width=0.25, color="black") +
  geom_vline(xintercept=0.5, lty=2) +
  #facet_grid(subset.var.val ~ y, scales = "free") +
  facet_grid(x.cat ~ ., scales = "free", space = "free") +
  scale_color_gradient2(low="purple", mid="grey", high="darkgreen", midpoint = 0.5) +
  scale_x_continuous(labels = scales::percent_format(1), name = "% of brands with on-quadrant partisan signals") +
  scale_y_discrete(name = "") +
  theme_custom +
  theme(legend.position="none",
        strip.text.y=element_text(angle=0))
ggsave_v(p.quad.ideal.stkhl, 
         filename = "figures/reg/stkhl_quad.pdf",
         width=8, height=4)

### Visualise combined estimates ----
p.cmbd.ideal.stkhl <- cowplot::plot_grid(
  p.reg.ideal.stkhl + 
    theme(strip.background = element_blank(),
          strip.text = element_text(size=0),
          plot.margin = unit(c(0,-0.3,0,0), "cm")),
  p.quad.ideal.stkhl + 
    scale_x_continuous(limits=c(0,1), labels = scales::percent_format(1), name = "% on-quadrant brands") +
    theme(axis.text.y = element_text(size=0),
          plot.margin = unit(c(0,0.1,0,-0.3), "cm")),
  nrow = 1, rel_widths = c(0.5, 0.45), align="h"
)
ggsave_v(p.cmbd.ideal.stkhl, 
         filename = "figures/reg/stkhl_cmbd.pdf",
         width=10, height=4)

## 7.2. Sign(Ideal Points) ~ Stakeholders --------------------------------------

specifs.ideal.sign.stkhl <- build_specifs(
  data = d.brands,
  x.vars = c(colnames(d.brands)[grepl("^R_don_share",colnames(d.brands))],
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP", "sg.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R"),
  y.vars = c("ideal.main"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = d.brands.quad.mids,
  se.cluster.vars = c(),
  subset.vars = c("num_empl.2"),
  scale.y = TRUE,
  scale.x = TRUE,
  sign.y = TRUE,
  sign.x = FALSE  
)

results.ideal.sign.stkhl <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.sign.stkhl, 
  lm.func = lm, 
  debug = F, 
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T,
  quad.stats = F,
  equiv.test = F,
  parallel.cores = 12
)

### Visualise regression estimates ----
p.reg.ideal.sign.stkhl <- bind_rows(results.ideal.stkhl %>% 
                                      mutate(outcome = "Unbinarized"),
                                    results.ideal.sign.stkhl %>%
                                      mutate(outcome = "Binarized")) %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  mutate(x.name = sanitize_var(x)) %>%
  arrange(estimate) %>%
  mutate(x.name=as_factor(x.name)) %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  mutate(color = ifelse(p.value < p.value.adj.alpha, "black", "gray")) %>%
  # mutate(color = ifelse(p.value < 0.05, "black", "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color, group=outcome, shape=outcome)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5, position = position_dodge(width=0.5)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1, position = position_dodge(width=0.5)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.5)) +
  #facet_grid(subset.var.val ~ y, scales = "free") +
  scale_color_identity() +
  scale_shape_manual(values = 21:24, name="Outcome:") +
  scale_x_continuous(name = TeX("Coefficient estimate")) +
  scale_y_discrete(name = "") +  
  theme_custom +
  theme(legend.position=c(0.87, 0.1),
        legend.background=element_rect(fill = "white", color = "black"))

## 7.3. Sign(Ideal Points) ~ Sign(Stakeholders) --------------------------------

specifs.ideal.sign.stkhl.sign <- build_specifs(
  data = d.brands,
  x.vars = c(colnames(d.brands)[grepl("^R_don_share",colnames(d.brands))],
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP", "sg.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R"),
  y.vars = c("ideal.main","ideal.main.bin","ideal.tw","ideal.ig","ideal.mdl","ideal.stances"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = list("ideal.main"=0,"ideal.main.bin"=0,"ideal.tw"=0,"ideal.ig"=0,"ideal.mdl"=0,"ideal.stances"=0),
  se.cluster.vars = c(),
  subset.vars = c("num_empl.2"),
  scale.y = TRUE,
  scale.x = TRUE,
  sign.y = TRUE,
  sign.x = TRUE  
)

results.ideal.sign.stkhl.sign <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.sign.stkhl.sign, 
  lm.func = lm, 
  debug = F, 
  p.adjust.group.vars = c("y","subset.var.val"),
  quad.stats = F,
  parallelize = T,
  equiv.test = F,
  parallel.cores = 12
)

### Visualise regression estimates ----
p.reg.ideal.sign.stkhl.sign <- bind_rows(results.ideal.stkhl %>% 
                                           mutate(vars = "Unbinarized"),
                                         results.ideal.sign.stkhl.sign %>%
                                           mutate(vars = "Binarized")) %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  mutate(x.name = sanitize_var(x)) %>%
  arrange(estimate) %>%
  mutate(x.name=as_factor(x.name)) %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  mutate(color = ifelse(p.value < p.value.adj.alpha, "black", "gray")) %>%
  # mutate(color = ifelse(p.value < 0.05, "black", "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color, group=vars, shape=vars)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5, position = position_dodge(width=0.5)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1, position = position_dodge(width=0.5)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.5)) +
  #facet_grid(subset.var.val ~ y, scales = "free") +
  scale_color_identity() +
  scale_shape_manual(values = 21:24, name="Outcome + Predictors:") +
  scale_x_continuous(name = TeX("Coefficient estimate")) +
  scale_y_discrete(name = "") +  
  theme_custom +
  theme(legend.position=c(0.8, 0.12),
        legend.background=element_rect(fill = "white", color = "black"))

## 7.4. Ideal Points ~ Stakeholders (bootstrapped) -----------------------------

specifs.boot.ideal.mdl.stkhl <- build_specifs(
  data = d.brands,
  x.vars = c(colnames(d.brands)[grepl("^R_don_share",colnames(d.brands))],
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP", "sg.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R"),
  y.vars = c("ideal.mdl"),
  # x.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  x.mids = list("ideal.mdl" = 0),
  se.cluster.vars = c(NA),
  scale.y = TRUE,
  scale.x = TRUE
)

d.brands.boot.ideal.mdl <- d.brands %>%
  select(-starts_with("ideal")) %>%
  full_join(scaled_text_alt$par_boot_mdl$brands_bycategory_bigrams_pois_boots %>%
              select(yougov_name, ideal.mdl=slant, boot),
            by = "yougov_name") %>%
  filter(!is.na(boot), !is.na(ideal.mdl))

results.boot.ideal.mdl.stkhl <- measure_bootstrapped_alignments_robustly( #ETA: 20 min
  data.boots.x = d.brands.boot.ideal.mdl, 
  lm.specifs = specifs.boot.ideal.mdl.stkhl, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y","subset.var.val"),
  parallelize = F#,
  # parallel.cores = parallel::detectCores()
  # parallel.cores = 8
)

results.boot.ideal.mdl.stkhl.qoi <- results.boot.ideal.mdl.stkhl %>%
  group_by(y, x, c, w, se.clus, formula) %>%
  summarise_at(c("estimate","std.error","p.value","n.obs",
                 "quad.chisq.pval","quad.binom1s.pval","quad.align.pct.mean"), 
               list(mean = mean, 
                    ci.95.upr = function(.) quantile(.,0.975),
                    ci.95.lwr = function(.) quantile(.,0.025))) %>%
  group_by_at(c("y")) %>%
  arrange(p.value_mean) %>%
  mutate(p.value.adj.method = "BHq",
         k = n(), ## number of hypotheses
         r = 1:n(), ## rank of p-values
         p.value.adj.alpha = (r*GLOBAL_ALPHA_THRESHOLD)/k, ## stepped-up thresholds
         p.value.adj.sig = p.value_mean < p.value.adj.alpha, ## stepped-up hypothesis tests
         p.value.adj.zcrit = qnorm(1 - (p.value.adj.alpha)/2), ## new critical values for asymptotic CIs
         p.value.adj.tcrit = qt(1 - (p.value.adj.alpha)/2, n.obs_mean-1) ## new critical values for small-sample CIs
  ) %>%
  select(-k, -r)

### Visualise regression estimates ----
results.boot.ideal.mdl.stkhl.qoi %>%
  filter(is.na(se.clus)) %>%
  mutate(color = ifelse(p.value_mean < p.value.adj.alpha, "black", "gray")) %>%
  ggplot(aes(y=x, x=estimate_mean, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate_mean-p.value.adj.tcrit*std.error_mean, xmax=estimate_mean+p.value.adj.tcrit*std.error_mean), size=1) + 
  geom_linerange(aes(xmin=estimate_mean-1.96*std.error_mean, xmax=estimate_mean+1.96*std.error_mean), size=2) + 
  geom_point(size=3) +
  facet_grid(~ y, scales = "free") +
  scale_color_identity() +
  theme_custom

## 7.5. Ideal Points ~ Activities ----------------------------------------

specifs.ideal.actv <- build_specifs(
  data = d.brands,
  x.vars = c(#"legis.R_frac",
             "opsec.R_share.org_dollars",
             "opsec.R_cand_share.org_dollars",
             # "yougov_aud.gender.Female", 
             "zippia_empl.Ethnicity.Non-White",
             "zippia_empl.Genders.Female",
             "hrc_rating", 
             "gd.Race / Ethnicity.Black or African American.avg_rating",
             "glassdoor_Rating.Diversity and Inclusion",
             "gd.Gender.Women.avg_rating",
             "gd.Sexual Orientation.LGBTQ+.avg_rating",
             "cdp_avg_score",
             colnames(d.brands)[grepl("clm100_discl.Sub-indicator (5|6|7|8).*Mar.*21",colnames(d.brands))],
             # "clm100_discl.7.1 Metric assessment.a.Mar-22", # Brand commits to conduct all lobbying\nin line with Paris Agreement?
             "clm100_policy.Organisation Score.March 2022", # Climate Action 100+ Organization Score (March 2022)
             "gjf.n_off.discr",
             #"gjf.n_off.labor",
             "gjf.n_off.environ"),
  y.vars = c("ideal.main"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = d.brands.quad.mids,
  se.cluster.vars = NA,
  #subset.vars = c("num_empl.2"),
  scale.y = TRUE,
  scale.x = TRUE
)

results.ideal.actv <- measure_alignments_robustly(
  data = d.brands %>%
    filter(ideal.main.n > MIN_BIGRAM_COUNT) %>%
    mutate_at(vars(starts_with("gjf.n_off")), ~log(.x+LOG_OFFSET)), 
  lm.specifs = specifs.ideal.actv, 
  lm.func = lm, 
  p.adjust.group.vars = c("y","subset.var.val"),
  debug = F,
  quad.stats = F,
  parallelize = F,
  equiv.test = T
)

### Visualise regression estimates ----
p.reg.ideal.actv <- results.ideal.actv %>%
  # filter(!grepl("clm100_discl.Sub-indicator (1|2|3|4|8|9)", x)) %>%
  filter(is.na(se.clus), !is.na(estimate), subset.var.val == "all", y == "ideal.main") %>%
  mutate(x.name = sanitize_var(x, add_category = F)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
	mutate(estimate = case_when(
		grepl("PAC", x.name) ~ estimate,
		TRUE ~ -estimate
	), x.name = case_when(
		grepl("PAC", x.name) ~ gsub("R\\.","D.", x.name),
		TRUE ~ x.name
	)) %>% ## orient according to Dem signal	
  arrange(estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%  
  mutate(x.cat = case_when(
    grepl("(Employment|Workplace|Firm)", x.cat) ~ "Diversity, Equity,\nand Inclusion",
    grepl("Discrimination Offenses", x.name) ~ "Diversity, Equity,\nand Inclusion",
    grepl("Environmental Violations", x.name) ~ "Climate\nPolicy",
    TRUE ~ x.cat
  )) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "grey40",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color, group=subset.var.val, shape=subset.var.val)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=1, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=2, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  facet_grid(x.cat ~ ., scales = "free", space = "free") +
  scale_y_discrete(name = "") +
  scale_x_continuous(name = TeX("Coefficient estimate"),
                     limits = c(-0.75, 0.75)) +
  geom_text(data = data.frame(x.cat = "Political\nActivities"),
            x=Inf, y=0, size=3, label=TeX("More D. brand signal $\\rightarrow$"),
            hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  # scale_shape_manual(values = 21:24) +
  theme_custom +
  theme(legend.position="none",
        strip.text.y = element_text(angle=0))
ggsave_v(p.reg.ideal.actv,
         filename = "figures/reg/actv.pdf",
         width=10, height=4.5)

### Visualise influence statistics ----
p.zaminfl.pct.ideal.actv <- results.ideal.actv %>%
  filter(is.na(se.clus), subset.var.val == "all", y == "ideal.main") %>%
  select(y, x, zaminfl.both.change.prop_drop) %>%
  #select(y, x, zaminfl.sign.change.prop_drop, zaminfl.sig.change.prop_drop, zaminfl.both.change.prop_drop) %>%
  gather(key="type", value="value", -y, -x) %>%
  mutate(x.name = sanitize_var(x, add_category = FALSE)) %>%
  mutate(x.cat = categorize_var(x)) %>%
  mutate(x.cat = case_when(
  	grepl("(Employment|Workplace|Firm)", x.cat) ~ "Diversity, Equity,\nand Inclusion",
  	grepl("Discrimination Offenses", x.name) ~ "Diversity, Equity,\nand Inclusion",
  	grepl("Environmental Violations", x.name) ~ "Climate\nPolicy",  	
  	TRUE ~ x.cat
  )) %>%  
  arrange(value) %>%
  mutate(x.name=as_factor(x.name)) %>%  
  filter(!is.na(value)) %>%
  ggplot(aes(y=x.name, x=value)) +
  #ggplot(aes(y=x, x=value, group=type, color=type)) +
  facet_grid(x.cat ~ ., scales = "free", space = "free") +
  geom_vline(xintercept=0, lty=2) +
  geom_pointrange(aes(x=value, xmin=0, xmax=value), position=position_dodge(width=0.5), size=1) +
  scale_x_continuous(labels = scales::percent_format(1), name = "% of influential observations dropped to significantly change results") +
  scale_y_discrete(name = "") +
  theme_custom +
  theme(legend.position="none",
        strip.text.y=element_text(angle=0, size=6))
ggsave_v(p.zaminfl.pct.ideal.actv, 
         filename = "figures/reg/actv_zaminfl.pdf",
         width=8.25, height=3)

### Visualise equivalence tests ----
p.tost.ideal.actv <- results.ideal.actv %>%
  filter(is.na(se.clus), !is.na(estimate), subset.var.val == "all", y == "ideal.main") %>%
  mutate(x.name = sanitize_var(x, add_category = FALSE)) %>%
  arrange(estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%   
  mutate(x.cat = categorize_var(x)) %>%
  mutate(x.cat = case_when(
    grepl("(Employment|Workplace|Firm|Regulat)", x.cat) ~ "Diversity, Equity,\nand Inclusion",
    TRUE ~ x.cat
  )) %>%  
  mutate(color = case_when(tost.0.20.u.p.value.adj.sig ~ "black",
                           tost.0.20.pval < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  # mutate(tost.0.20.u.ci.95.lwr = ifelse(p.value < p.value.adj.alpha, NA, tost.0.20.u.ci.95.lwr),
  #        tost.0.20.u.ci.95.upr = ifelse(p.value < p.value.adj.alpha, NA, tost.0.20.u.ci.95.upr),
  #        color = ifelse(p.value < p.value.adj.alpha, "gray", color)) %>%
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=-0.20, lty=2) +
  geom_vline(xintercept=0.20, lty=2) +
  geom_linerange(aes(xmin=tost.0.20.u.ci.95.lwr, xmax=tost.0.20.u.ci.95.upr), size=3, alpha=0.8) + 
  geom_point(size=3, shape=21, fill="white") +
  #ggtitle("significant p-value = no meaningful difference (low threshold)") +
  #facet_grid(~ y, scales = "free") +
  facet_grid(x.cat ~ ., scales = "free", space = "free") +
  scale_x_continuous(name = TeX("Coefficient estimate relative to 95% TOST interval")) +
  scale_y_discrete(name = "") +  
  scale_color_identity() +
  theme_custom +
  theme(legend.position="none",
        strip.text.y=element_text(angle=0))
ggsave_v(p.tost.ideal.actv, 
         filename = "figures/reg/actv_tost.pdf",
         width=8.25, height=4)

## 7.6. Sign(Ideal Points) ~ Activities ----------------------------------

specifs.ideal.sign.actv <- build_specifs(
  data = d.brands,
  x.vars = c(#"legis.R_frac",
             "opsec.R_share.org_dollars",
             "opsec.R_cand_share.org_dollars",
             "yougov_aud.gender.Female", 
             "zippia_empl.Ethnicity.White",
             "hrc_rating", 
             "gd.Race / Ethnicity.Black or African American.avg_rating",
             "glassdoor_Rating.Diversity and Inclusion",
             "gd.Gender.Women.avg_rating",
             "gd.Sexual Orientation.LGBTQ+.avg_rating",
             "cdp_avg_score",
             colnames(d.brands)[grepl("clm100_discl.Sub-indicator.7",colnames(d.brands))],
             # colnames(d.brands)[grepl("clm100_discl.Sub-indicator",colnames(d.brands))],
             # "clm100_discl.7.1 Metric assessment.a.Mar-22", # Brand commits to conduct all lobbying\nin line with Paris Agreement?
             "clm100_policy.Organisation Score.March 2022", # Climate Action 100+ Organization Score (March 2022)
             "gjf.n_off.discr",
             "gjf.n_off.labor",
             "gjf.n_off.environ"),
  y.vars = c("ideal.main","ideal.main.bin","ideal.tw","ideal.ig","ideal.mdl","ideal.stances"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = list("ideal.main"=0,"ideal.main.bin"=0,"ideal.tw"=0,"ideal.ig"=0,"ideal.mdl"=0,"ideal.stances"=0),
  se.cluster.vars = NA,
  subset.vars = c("num_empl.2"),
  scale.y = TRUE,
  scale.x = TRUE,
  sign.y = TRUE,
  sign.x = TRUE
)

results.ideal.sign.actv <- measure_alignments_robustly(
  data = d.brands %>%
    mutate_at(vars(starts_with("gjf.n_off")), ~log(.x+LOG_OFFSET)), 
  lm.specifs = specifs.ideal.sign.actv, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y","subset.var.val"),
  quad.stats = F,
  parallelize = F,
  equiv.test = T
)

### Visualise regression estimates ----
results.ideal.sign.actv %>%
  filter(is.na(se.clus), !is.na(estimate), !grepl("clm100_discl.Sub-indicator (1|2|3|4|8|9)", x)) %>%
  mutate(color = ifelse(p.value < p.value.adj.alpha, "black", "gray")) %>%
  ggplot(aes(y=x, x=estimate, color=color, group=subset.var.val, shape=subset.var.val)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=1, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=2, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  facet_grid(subset.var.val ~ y, scales = "free") +
  scale_color_identity() +
  scale_shape_manual(values = 21:24) +
  theme_custom +
  theme(legend.position="top")

## 7.7. Ideal Points ~ Activities (bootstrapped) -------------------------

specifs.boot.ideal.mdl.actv <- data.frame()

## 7.8. Ideal Points ~ Other Covariates ----------------------------------

specifs.ideal.other.consumers <- build_specifs(
  data = d.brands,
  x.vars = c(colnames(d.brands)[grepl("^(yougov_pct)", colnames(d.brands))],
             # "legis.R_frac",
             colnames(d.brands)[grepl("^(yougov_aud).*(Income|educ|gender|region|age)", colnames(d.brands))]),
  y.vars = c("ideal.main"), se.cluster.vars = c(NA), scale.y = TRUE, scale.x = TRUE
)
results.ideal.other.consumers <- measure_alignments_robustly(
  data = d.brands,
  lm.specifs = specifs.ideal.other.consumers, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y","subset.var.val"), quad.stats = F, parallelize = F, equiv.test = F
)
#
specifs.ideal.other.empl <- build_specifs(
  # data = d.brands,
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  x.vars = c(colnames(d.brands)[grepl("^(zippia_empl).*(Gender|Degree|Ethn|Age)", colnames(d.brands))]),
  y.vars = c("ideal.main"), se.cluster.vars = c(NA), scale.y = TRUE, scale.x = TRUE
)
results.ideal.other.empl <- measure_alignments_robustly(
  data = d.brands,
  lm.specifs = specifs.ideal.other.empl, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y","subset.var.val"), quad.stats = F, parallelize = F, equiv.test = F
)
#
specifs.ideal.other.firm <- build_specifs(
  # data = d.brands,
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  x.vars = c("rev_mil.final.log",
             "hq_main_in_US",
             "legis.R_frac",
             colnames(d.brands)[grepl("yougov.*pct", colnames(d.brands))],
             "tw_count",
             "tw_followers",
             "num_empl.final.log"),
  y.vars = c("ideal.main"), se.cluster.vars = c(NA), scale.y = TRUE, scale.x = TRUE
)
results.ideal.other.firm <- measure_alignments_robustly(
  # data = d.brands,
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  lm.specifs = specifs.ideal.other.firm, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y","subset.var.val"), quad.stats = F, parallelize = F, equiv.test = F
)


### Visualise regression estimates ----

p.ideal.other.consumers <- results.ideal.other.consumers %>%
  filter(is.na(se.clus), !grepl("(net_opin|gender.Male)", x)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "grey50",
                           TRUE ~ "gray")) %>%
  rowwise() %>%
  mutate(var_type = strsplit(x, split="\\.")[[1]][2],
         var_val = strsplit(x, split="\\.")[[1]][3]) %>%  
  mutate(var_val = gsub("_"," ",var_val),
         var_val = gsub("\\(.*","",var_val),
         var_val = case_when(
           grepl("pct_recog", x, ignore.case=T) ~ "Recognition",
           grepl("pct_pos", x, ignore.case=T) ~ "Positive Opinion",
           grepl("pct_neg", x, ignore.case=T) ~ "Negative Opinion",
           TRUE ~ var_val
         ),
         var_type = case_when(
           grepl("Educ", var_type, ignore.case=T) ~ "Educ. Status",
           grepl("Empl", var_type, ignore.case=T) ~ "Empl. Status",
           grepl("pct", x, ignore.case=T) ~ "Overall",
           TRUE ~ var_type
         ),
         var_type = paste0("% ", stringr::str_to_title(var_type))) %>%
  group_by(var_type) %>%
  arrange(-estimate) %>%
  mutate(var_val = as_factor(var_val)) %>%
  ggplot(aes(y=var_val, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), size=1) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), size=2) + 
  geom_point(size=3) +
  facet_grid(var_type ~ ., scales = "free", space = "free_y") +
  ylab("") +
  scale_x_continuous(name = TeX("Coefficient estimate")) +
  geom_text(data = data.frame(var_type = "% Region"), 
            x=-Inf, y=0, size=3, label=TeX("$\\leftarrow$ More D. brand signal"), 
            hjust=-0.15, vjust=-1, inherit.aes=F) +
  scale_color_identity() +
  theme_custom_vertpanel
ggsave_v(p.ideal.other.consumers, filename = "figures/reg/other_cons.pdf",
         height = 5, width = 8)
##
p.ideal.other.empl <- results.ideal.other.empl %>%
  filter(is.na(se.clus), !grepl("(net_opin|Male)", x)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "grey50",
                           TRUE ~ "gray")) %>%  
  rowwise() %>%
  mutate(var_type = strsplit(x, split="\\.")[[1]][2],
         var_val = strsplit(x, split="\\.")[[1]][3]) %>%  
  mutate(var_val = gsub("_"," ",var_val),
         var_val = gsub("\\(.*","",var_val),
         var_val = case_when(
           grepl("pct_recog", x, ignore.case=T) ~ "Recognition",
           grepl("pct_pos", x, ignore.case=T) ~ "Positive Opinion",
           grepl("pct_neg", x, ignore.case=T) ~ "Negative Opinion",
           TRUE ~ var_val
         ),
         var_type = case_when(
           grepl("Age", var_type, ignore.case=T) ~ "Age",
           grepl("Gender", var_type, ignore.case=T) ~ "Gender",
           grepl("Degree", var_type, ignore.case=T) ~ "Educ. Status",
           TRUE ~ var_type
         ),
         var_type = paste0("% ", stringr::str_to_title(var_type))) %>%
  group_by(var_type) %>%
  arrange(-estimate) %>%
  mutate(var_val = as_factor(var_val)) %>%  
  ggplot(aes(y=var_val, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), size=1) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), size=2) + 
  geom_point(size=3) +
  facet_grid(var_type ~ ., scales = "free", space = "free_y") +
  ylab("") +
  scale_x_continuous(name = TeX("Coefficient estimate")) +
  geom_text(data = data.frame(var_type = "% Gender"), 
            x=-Inf, y=0, size=3, label=TeX("$\\leftarrow$ More D. brand signal"), 
            hjust=-0.15, vjust=-1, inherit.aes=F) +
  scale_color_identity() +
  theme_custom_vertpanel
ggsave_v(p.ideal.other.empl, filename = "figures/reg/other_empl.pdf",
         height = 4.5, width = 8)
##
p.ideal.other.firm <- results.ideal.other.firm %>%
  filter(is.na(se.clus), !grepl("(net_opin|Male)", x)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  rowwise() %>%
  mutate(x.name = sanitize_var(x, add_category=F)) %>%
  # group_by(x.name) %>%
  arrange(-estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%  
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), size=1) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), size=2) + 
  geom_point(size=3) +
  ylab("") +
  scale_x_continuous(name = TeX("Coefficient estimate")) +
  annotate("text",
            x=-Inf, y=0, size=3, label=TeX("$\\leftarrow$ More D. brand signal"), 
            hjust=0, vjust=-0.5, inherit.aes=F) +
  scale_color_identity() +
  theme_custom
ggsave_v(p.ideal.other.firm, filename = "figures/reg/other_firm.pdf",
         height = 2.5, width = 8)

## 7.9. Keywords ~ Stakeholders/Activities -------------------------------

colnames(d.brands)[grepl("kwd",colnames(d.brands))]

specifs.kwds.race <- build_specifs(
  data = d.brands,
  x.vars = c("zippia_empl.Ethnicity.Non-White",
             "zippia_empl.Ethnicity.Black Or African American",
             "gd.Race / Ethnicity.Black or African American.avg_rating",
             "gjf.n_off.discr",
             "glassdoor_Rating.Diversity and Inclusion"),
  y.vars = c("kwd.Racial.black",
             "kwd.Nonwhite",
             "kwd.Racial Justice"),
  se.cluster.vars = NA,
  scale.y = FALSE,
  scale.x = TRUE
)
results.kwds.race <- measure_alignments_robustly(
  # data = d.brands %>%
  #   mutate_at(vars(matches("kwd")), ~log(ifelse(is.na(.x),0,.x)+LOG_OFFSET)), 
  # data = d.brands, 
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  lm.specifs = specifs.kwds.race, 
  # lm.func = lm,
  lm.func = MASS::glm.nb,
  debug = F, quad.stats = F, parallelize = F, equiv.test = F, zaminfl = F
)
##
specifs.kwds.lgbtq <- build_specifs(
  data = d.brands,
  x.vars = c("gd.Sexual Orientation.LGBTQ+.avg_rating",
             "hrc_rating",
             "glassdoor_Rating.Diversity and Inclusion",
             "gjf.n_off.discr"),
  y.vars = c("kwd.LGBTQ"),
  se.cluster.vars = NA,
  scale.y = FALSE,
  scale.x = TRUE
)
results.kwds.lgbtq <- measure_alignments_robustly(
  # data = d.brands %>%
  #   mutate_at(vars(matches("kwd")), ~log(ifelse(is.na(.x),0,.x)+LOG_OFFSET)), 
  # data = d.brands,
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  lm.specifs = specifs.kwds.lgbtq, 
  # lm.func = lm,
  lm.func = MASS::glm.nb,
  debug = F, quad.stats = F, parallelize = F, equiv.test = F, zaminfl = F
)
##
specifs.kwds.women <- build_specifs(
  data = d.brands,
  x.vars = c("gd.Gender.Women.avg_rating",
             "zippia_empl.Genders.Female",
             "yougov_aud.gender.Female",
             "glassdoor_Rating.Diversity and Inclusion",
             "gjf.n_off.discr"),
  y.vars = c("kwd.Women"),
  se.cluster.vars = NA,
  scale.y = FALSE,
  scale.x = TRUE
)
results.kwds.women <- measure_alignments_robustly(
  # data = d.brands %>%
  #   mutate_at(vars(matches("kwd")), ~log(ifelse(is.na(.x),0,.x)+LOG_OFFSET)), 
  # data = d.brands,
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  lm.specifs = specifs.kwds.women, 
  # lm.func = lm,
  lm.func = MASS::glm.nb,
  debug = F, quad.stats = F, parallelize = F, equiv.test = F, zaminfl = F
)
##
specifs.kwds.climate <- build_specifs(
  data = d.brands,
  x.vars = c("cdp_avg_score",
             colnames(d.brands)[grepl("clm100_discl.Sub-indicator (5|6|7|8).*Mar.*21",colnames(d.brands))],
             # "clm100_discl.7.1 Metric assessment.a.Mar-22", # Brand commits to conduct all lobbying\nin line with Paris Agreement?
             "clm100_policy.Organisation Score.March 2022", # Climate Action 100+ Organization Score (March 2022)
             "gjf.n_off.environ"),
  y.vars = c("kwd.Climate/Environment"),
  se.cluster.vars = NA,
  scale.y = FALSE,
  scale.x = TRUE
)
results.kwds.climate <- measure_alignments_robustly(
  # data = d.brands %>%
  #   mutate_at(vars(matches("kwd")), ~log(ifelse(is.na(.x),0,.x)+LOG_OFFSET)), 
  # data = d.brands,
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  lm.specifs = specifs.kwds.climate, 
  # lm.func = lm,
  lm.func = MASS::glm.nb,
  debug = F, quad.stats = F, parallelize = F, equiv.test = F, zaminfl = F
)

### Visualise regression estimates ----

p.kwds.race <- results.kwds.race %>%
  filter(is.na(se.clus)) %>%
  mutate(x.name = sanitize_var(x, add_category = F)) %>%
  arrange(estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%   
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), size=1) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), size=2) + 
  geom_point(size=3) +
  facet_grid(~ y, scales = "free", labeller = labeller(`y` = c(
    `kwd.Racial.black` = '"Black"',
    `kwd.Nonwhite` = '"Race"',
    `kwd.Racial Justice` = '"Racial Justice"'
  ))) +
  xlab("Standardized negative binomial coefficient estimate") +
  ylab("") +
  scale_color_identity() +
  theme_custom

p.kwds.lgbtq <- results.kwds.lgbtq %>%
  filter(is.na(se.clus)) %>%
  mutate(x.name = sanitize_var(x, add_category = F)) %>%
  arrange(estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%     
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), size=1) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), size=2) + 
  geom_point(size=3) +
  facet_grid(~ y, scales = "free", labeller = labeller(`y` = c(
    `kwd.LGBTQ` = '"LGBTQ"'
  ))) +
  xlab("Standardized negative binomial coefficient estimate") +
  ylab("") +  
  scale_color_identity() +
  theme_custom

p.kwds.women <- results.kwds.women %>%
  filter(is.na(se.clus)) %>%
  mutate(x.name = sanitize_var(x, add_category = F)) %>%
  arrange(estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%     
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), size=1) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), size=2) + 
  geom_point(size=3) +
  facet_grid(~ y, scales = "free", labeller = labeller(`y` = c(
    `kwd.Women` = '"Women"'
  ))) +
  xlab("Standardized negative binomial coefficient estimate") +
  ylab("") +  
  scale_color_identity() +
  theme_custom

p.kwds.climate <- results.kwds.climate %>%
  filter(is.na(se.clus), !is.na(estimate), !grepl("clm100_discl.Sub-indicator (1|2|3|4|8|9)", x)) %>%
  mutate(x.name = sanitize_var(x, add_category = F)) %>%
  arrange(estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%  
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), size=1) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), size=2) + 
  geom_point(size=3) +
  facet_grid(~ y, scales = "free", labeller = labeller(`y` = c(
    `kwd.Climate/Environment` = '"Climate/Environment"'
  ))) +
  xlab("Standardized negative binomial coefficient estimate") +
  ylab("") +  
  scale_color_identity() +
  theme_custom

p.kwds.cmbd <- cowplot::plot_grid(
  p.kwds.race + scale_x_continuous(limits = c(-3.5,3.5), name = ""),
  p.kwds.lgbtq + scale_x_continuous(limits = c(-3.5,3.5), name = ""),
  p.kwds.women + scale_x_continuous(limits = c(-3.5,3.5), name = ""),
  p.kwds.climate + scale_x_continuous(limits = c(-3.5,3.5), name = "Standardized negative binomial coefficient estimate"),
  ncol = 1, align = "h", rel_heights = c(1,1,1,1.3)
)
ggsave_v(p.kwds.cmbd, 
         filename = "figures/reg/kwds_cmbd.pdf",
         width=10, height=7)

## 7.10. Alternative Measures ~ Stakeholders/Activities -------------------------------

specifs.ideal.stkhl.alt <- build_specifs(
  data = d.brands,
  x.vars = c("R_don_share",
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R"),
  y.vars = c("ideal.main","ideal.main.bin","ideal.groups","ideal.issues","ideal.mdl","ideal.tw","ideal.ig"),
  x.mids = d.brands.quad.mids,
  y.mids = d.brands.quad.mids,
  se.cluster.vars = c(),
  scale.y = TRUE,
  scale.x = TRUE
)
results.ideal.stkhl.alt <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.stkhl.alt, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T, quad.stats = F,
  equiv.test = F, zaminfl = F,
  parallel.cores = 12
)
##
specifs.ideal.actv.alt <- build_specifs(
  data = d.brands,
  x.vars = c(#"legis.R_frac",
             "opsec.R_cand_share.org_dollars",
             "hrc_rating", 
             "glassdoor_Rating.Diversity and Inclusion",
             "cdp_avg_score",
             "clm100_policy.Organisation Score.March 2022", # Climate Action 100+ Organization Score (March 2022)
             "gjf.n_off.discr",
             "gjf.n_off.environ"),
  y.vars = c("ideal.main","ideal.main.bin","ideal.groups","ideal.issues","ideal.mdl","ideal.tw","ideal.ig"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = d.brands.quad.mids,
  se.cluster.vars = NA,
  #subset.vars = c("num_empl.2"),
  scale.y = TRUE,
  scale.x = TRUE
)
results.ideal.actv.alt <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.actv.alt, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T, quad.stats = F,
  equiv.test = F, zaminfl = F,
  parallel.cores = 12
)

### Visualise regression estimates ----
type.labs <- list(
  `Main`="ideal.main", 
  `Binarized`="ideal.main.bin", 
  `Groups Only`="ideal.groups", 
  `Issues Only`="ideal.issues", 
  `Parametric`="ideal.mdl", 
  `Twitter Only`="ideal.tw",
  `Instagram Only`="ideal.ig"
); type.labs.0 <- names(type.labs); names(type.labs.0) <- unlist(type.labs); type.labs <- type.labs.0; rm(type.labs.0);


p.ideal.stkhl.alt <- results.ideal.stkhl.alt %>%
  filter(is.na(se.clus), !is.na(estimate), subset.var.val == "all") %>%
  mutate(x.name = sanitize_var(x, add_category = F)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
  mutate(y = factor(y, levels = rev(c("ideal.main",
                                      "ideal.main.bin",
                                      "ideal.groups",
                                      "ideal.issues",
                                      "ideal.mdl",
                                      "ideal.tw",
                                      "ideal.ig")))) %>%
  rowwise() %>% 
  mutate(y.name = type.labs[[y]]) %>% 
  arrange(desc(y)) %>%
  mutate(y.name = as_factor(y.name)) %>%
  arrange(x) %>%
  mutate(x = factor(x, levels = c(
    "R_don_share",
    "twitter.foll_ideo_slant",
    "hq_pres.REP",
    "zi.pres.REP",
    "cong_house_dw_mean",
    "cong_sen_dw_mean",
    "stkhl.R"
  ))) %>%  
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  mutate(shape = ifelse(y.name == "Main", 23, 16)) %>%
  ggplot(aes(y=y.name, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=1, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=2, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  facet_grid(x ~ ., scales = "free", space = "free", 
             labeller = labeller(
               x = c(
                 `R_don_share` = "All Employees + Board\n(% R. Donations)",
                 `twitter.foll_ideo_slant` = "Twitter Followers\n(R. Direction)",
                 `hq_pres.REP` = "Voters in HQ ZIP Codes\n(% R. Pres Vote)",
                 `zi.pres.REP` = "Voters in Retail ZIP Codes\n(Zippia)",
                 `cong_house_dw_mean` = "Ideology of\nHQ House Rep",
                 `cong_sen_dw_mean` = "Ideology of\nHQ Senator",
                 `stkhl.R` = "All Stakeholders\n(% R)"
               )
             )) +
  scale_x_continuous(name = TeX("Coefficient estimate"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Brand signal measures") +
  # geom_text(data = data.frame(x.name = "% R. Legislators Lobbied"),
  #           x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
  #           hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  scale_shape_identity() +
  theme_custom +
  theme(legend.position="none",
        strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.stkhl.alt, 
         filename = "figures/reg/stkhl_alt.pdf",
         width=10, height=8)

p.ideal.actv.alt <- results.ideal.actv.alt %>%
  filter(is.na(se.clus), !is.na(estimate), subset.var.val == "all") %>%
  mutate(x.name = sanitize_var(x, add_category = F)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
  mutate(y = factor(y, levels = rev(c("ideal.main",
                                      "ideal.main.bin",
                                      "ideal.groups",
                                      "ideal.issues",
                                      "ideal.mdl",
                                      "ideal.tw",
                                      "ideal.ig")))) %>%
  rowwise() %>% 
  mutate(y.name = type.labs[[y]]) %>% 
  arrange(desc(y)) %>%
  mutate(y.name = as_factor(y.name)) %>%
  arrange(estimate) %>%
  mutate(x.name=as_factor(x.name)) %>%  
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  mutate(shape = ifelse(y.name == "Main", 23, 16)) %>%
  ggplot(aes(y=y.name, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=1, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=2, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  facet_grid(x.name ~ ., scales = "free", space = "free",) +
  scale_x_continuous(name = TeX("Coefficient estimate"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Brand signal measures") +
  geom_text(data = data.frame(x.name = "% PAC $ on R. Candidates"),
            x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
            hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  scale_shape_identity() +
  theme_custom +
  theme(legend.position="none",
        strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.actv.alt, 
         filename = "figures/reg/actv_alt.pdf",
         width=10, height=8)

# 8.) MEASURE ALIGNMENT ROBUSTLY OVER TIME ------------------------------------

d.brands.y <- d.brands.y %>%
	mutate(year = as.character(year)) %>%
	group_by(yougov_name, yougov_id) %>%
	mutate(ideal.main.lag = lag(ideal.main, order_by = year),
		   ideal.main.lead = lead(ideal.main, order_by = year)) %>%
	ungroup()

## 8.1. Ideal Points ~ Stakeholders --------------------------------------------

specifs.ideal.stkhl.y <- build_specifs(
  data = d.brands.y %>%
    mutate(year = as.character(year)),
  x.vars = c(colnames(d.brands)[grepl("^R_don_share",colnames(d.brands))],
             "cong_house_dw_mean", "cong_sen_dw_mean",
             # "twitter.foll_ideo_slant",
             "sl.Rep_Pct",
             "hq_pres.REP", "zi.pres.REP", "sg.pres.REP",             
             "stkhl.R"),
  y.vars = c("ideal.main"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = list("ideal.main"=0),
  se.cluster.vars = c(),
  subset.vars = c("year"),
  scale.y = TRUE,
  scale.x = TRUE
)

results.ideal.stkhl.y <- measure_alignments_robustly(
	data = d.brands.y.l,
	lm.specifs = specifs.ideal.stkhl.y, 
	lm.func = lm, 
	debug = F, 
	p.adjust = T,
	p.adjust.group.vars = c("y","x"),
	parallelize = T,
	equiv.test = F,
	parallel.cores = 12
)

### additional specs to measure lag/lead/current effects pooled across years
specifs.ideal.stkhl.y2 <- bind_rows(
	specifs.ideal.stkhl.y %>%
		mutate_at(vars(matches("subset")), ~NA) %>%
		mutate(formula =  paste(formula, "+ as.factor(year)")) %>%
		distinct(),
	specifs.ideal.stkhl.y %>%
			mutate_at(vars(matches("subset")), ~NA) %>%
		distinct() %>%
		mutate(y = "ideal.main.lag",
			   formula = gsub("ideal.main", "ideal.main.lag", formula) %>%
			   	paste(., "+ as.factor(year)")),
	specifs.ideal.stkhl.y %>%
			mutate_at(vars(matches("subset")), ~NA) %>%
		distinct() %>%
		mutate(y = "ideal.main.lead",
			   formula = gsub("ideal.main", "ideal.main.lag", formula) %>%
			   	paste(., "+ as.factor(year)"))
)
results.ideal.stkhl.y2 <- measure_alignments_robustly(
	data = d.brands.y,
	lm.specifs = specifs.ideal.stkhl.y2, 
	lm.func = lm, 
	debug = F, 
	p.adjust = T,
	p.adjust.group.vars = c("y","x"),
	parallelize = T,
	zaminfl = F,
	quad.stats = F,
	equiv.test = F,
	parallel.cores = 12
)

### 8.1.1: yearly estimates ----

p.ideal.stkhl.y <- results.ideal.stkhl.y %>%
	filter(subset.var == "year") %>%
	# mutate(x.name = sanitize_var(x, add_category = F, multi_line = T)) %>%
	filter(is.na(se.clus), !is.na(estimate), subset.var.val != "all") %>%
	mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
							 p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
							 TRUE ~ "gray")) %>%
	# mutate(color = ifelse(p.value < 0.05, "black", "gray")) %>%
	mutate(x = factor(x, 
					  levels=c("R_don_share",
					  		 "R_don_share.Top_Exec",
					  		 "R_don_share.Managers",
					  		 "R_don_share.Board_Member",
					  		 "R_don_share.Rank_and_File",
					  		 "R_don_share.Legal",
					  		 "R_don_share.Marketing",
					  		 "R_don_share.Public_Relations",
					  		 "R_don_share.Human_Resources",
					  		 "sl.Rep_Pct",
					  		 "hq_pres.REP",
					  		 "sg.pres.REP",
					  		 "zi.pres.REP",
					  		 "cong_house_dw_mean",
					  		 "cong_sen_dw_mean",
					  		 "stkhl.R"))) %>%
	ggplot(aes(y=estimate, x=as.numeric(subset.var.val), color=color)) +
	geom_hline(yintercept=0, lty=2) +
	geom_linerange(aes(ymin=estimate-p.value.adj.tcrit*std.error, ymax=estimate+p.value.adj.tcrit*std.error),
				   size=1) +
	geom_linerange(aes(ymin=estimate-1.96*std.error, ymax=estimate+1.96*std.error), 
				   size=2) + 
	geom_point(fill="white", size=3) +
	facet_wrap(x ~ ., scales = "free",
			   labeller = labeller(
			   	x = c(
			   		`R_don_share` = "All Employees + Board\n(% R. Donations)",
			   		`R_don_share.Top_Exec` = "Executives",
			   		`R_don_share.Managers` = "Managers",
			   		`R_don_share.Board_Member` = "Board Members",
			   		`R_don_share.Rank_and_File` = "Rank and File\nEmployees",
			   		`R_don_share.Legal` = "Legal\nEmployees",
			   		`R_don_share.Marketing` = "Marketing\nEmployees",
			   		`R_don_share.Public_Relations` = "Public Relations\nEmployees",
			   		`R_don_share.Human_Resources` = "Human Resources\nEmployees",
			   		`sl.Rep_Pct` = "Twitter Followers\n(% R.)",
			   		`hq_pres.REP` = "Voters in HQ ZIP Codes\n(% R. Pres Vote)",
			   		`sg.pres.REP` = "Voters in Retail ZIP Codes\n(SafeGraph)",
			   		`zi.pres.REP` = "Voters in Retail ZIP Codes\n(Zippia)",
			   		`cong_house_dw_mean` = "Ideology of\nHQ House Rep",
			   		`cong_sen_dw_mean` = "Ideology of\nHQ Senator",
			   		`stkhl.R` = "All Stakeholders\n(% R)"
			   	)
			   )) +
	scale_color_identity() +
	scale_x_continuous(breaks=STUDY_YEARS[STUDY_YEARS%%2==0], name = "") +
	scale_y_continuous(name = "Correlation with Republican brand signals") +
	# scale_shape_manual(values = 21:24) +
	theme_custom +
	theme(legend.position="top", 
		  axis.text.x = element_text(angle=30, hjust=1))
ggsave_v(p.ideal.stkhl.y, 
		 filename = "figures/reg/overtime_stkhl.pdf",
		 width = 13, height = 8)

### 8.1.2: current, lag, lead effects ----

p.ideal.stkhl.y2 <- results.ideal.stkhl.y2 %>%
	filter(!grepl("year", term)) %>%
	mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
							 p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
							 TRUE ~ "gray")) %>%
	mutate(y = case_when(
		y == "ideal.main.lead" ~ "y-1 (lead)",
		y == "ideal.main.lag" ~ "y+1 (lag)",
		y == "ideal.main" ~ "y (contemporary)"
	) %>% factor(., levels = c("y-1 (lead)", "y (contemporary)", "y+1 (lag)"))) %>%
	mutate(x = factor(x, 
					  levels=c("R_don_share",
					  		 "R_don_share.Top_Exec",
					  		 "R_don_share.Managers",
					  		 "R_don_share.Board_Member",
					  		 "R_don_share.Rank_and_File",
					  		 "R_don_share.Legal",
					  		 "R_don_share.Marketing",
					  		 "R_don_share.Public_Relations",
					  		 "R_don_share.Human_Resources",
					  		 "sl.Rep_Pct",
					  		 "hq_pres.REP",
					  		 "sg.pres.REP",
					  		 "zi.pres.REP",
					  		 "cong_house_dw_mean",
					  		 "cong_sen_dw_mean",
					  		 "stkhl.R"))) %>%	
	ggplot(aes(x=estimate, y=y, color=color)) +
	geom_vline(xintercept=0, lty=2) +
	geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error),
				   size=1) +
	geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
				   size=2) + 
	geom_point(fill="white", size=3) +
	facet_grid(x ~ ., scales = "free",
			   labeller = labeller(
			   	x = c(
			   		`R_don_share` = "All Employees + Board\n(% R. Donations)",
			   		`R_don_share.Top_Exec` = "Executives",
			   		`R_don_share.Managers` = "Managers",
			   		`R_don_share.Board_Member` = "Board Members",
			   		`R_don_share.Rank_and_File` = "Rank and File\nEmployees",
			   		`R_don_share.Legal` = "Legal\nEmployees",
			   		`R_don_share.Marketing` = "Marketing\nEmployees",
			   		`R_don_share.Public_Relations` = "Public Relations\nEmployees",
			   		`R_don_share.Human_Resources` = "Human Resources\nEmployees",
			   		`sl.Rep_Pct` = "Twitter Followers\n(% R.)",
			   		`hq_pres.REP` = "Voters in HQ ZIP Codes\n(% R. Pres Vote)",
			   		`sg.pres.REP` = "Voters in Retail ZIP Codes\n(SafeGraph)",
			   		`zi.pres.REP` = "Voters in Retail ZIP Codes\n(Zippia)",
			   		`cong_house_dw_mean` = "Ideology of\nHQ House Rep",
			   		`cong_sen_dw_mean` = "Ideology of\nHQ Senator",
			   		`stkhl.R` = "All Stakeholders\n(% R)"
			   	)
			   )) +
	scale_color_identity() +
	scale_y_discrete(name = "Lag/Lead of Partisan Brand Signal DV (in Years)") +
	scale_x_continuous(name = "Correlation with Republican Political Signals") +
	theme_custom +
	theme(legend.position="none",
		  strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.stkhl.y2, 
		 filename = "figures/reg/overtime_stkhl_laglead.pdf",
		 width=8, height=9)

## 8.2. Ideal Points ~ Activities ----------------------------------------

specifs.ideal.actv.y <- build_specifs(
  data = d.brands.y %>%
    mutate(year = as.character(year)),
  x.vars = c("legis.R_frac",
             "opsec.R_share.org_dollars",
             "opsec.R_cand_share.org_dollars",
             # "yougov_aud.gender.Female", 
             # "zippia_empl.Ethnicity.Non-White",
             "hrc_rating", 
             # "gd.Race / Ethnicity.Black or African American.avg_rating",
             # "glassdoor_Rating.Diversity and Inclusion",
             # "gd.Gender.Women.avg_rating",
             # "gd.Sexual Orientation.LGBTQ+.avg_rating",
             "cdp_avg_score",
             # colnames(d.brands)[grepl("clm100_discl.Sub-indicator",colnames(d.brands))],
             # "clm100_discl.7.1 Metric assessment.a.Mar-22", # Brand commits to conduct all lobbying\nin line with Paris Agreement?
             # "clm100_policy.Organisation Score.March 2022", # Climate Action 100+ Organization Score (March 2022)
             "gjf.n_off.discr",
             "gjf.n_off.labor",
             "gjf.n_off.environ"),
  y.vars = c("ideal.main"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = list("ideal.main"=0),
  se.cluster.vars = NA,
  subset.vars = c("year"),
  scale.y = TRUE,
  scale.x = TRUE
)

results.ideal.actv.y <- measure_alignments_robustly(
	data = d.brands.y.l, 
	lm.specifs = specifs.ideal.actv.y, 
	lm.func = lm, 
	debug = F,
	p.adjust = T,
	p.adjust.group.vars = c("y","x"),  
	quad.stats = F,
	parallelize = F,
	equiv.test = T
)

### additional specs to measure lag/lead/current effects pooled across years
specifs.ideal.actv.y2 <- bind_rows(
	specifs.ideal.actv.y %>%
		mutate_at(vars(matches("subset")), ~NA) %>%
		distinct(),
	specifs.ideal.actv.y %>%
		mutate_at(vars(matches("subset")), ~NA) %>%
		distinct() %>%
		mutate(y = "ideal.main.lag",
			   formula = gsub("ideal.main", "ideal.main.lag", formula)),
	specifs.ideal.actv.y %>%
		mutate_at(vars(matches("subset")), ~NA) %>%
		distinct() %>%
		mutate(y = "ideal.main.lead",
			   formula = gsub("ideal.main", "ideal.main.lag", formula))
)
results.ideal.actv.y2 <- measure_alignments_robustly(
	data = d.brands.y,
	lm.specifs = specifs.ideal.actv.y2, 
	lm.func = lm, 
	debug = F, 
	p.adjust = T,
	p.adjust.group.vars = c("y","x"),
	parallelize = T,
	zaminfl = F,
	quad.stats = F,
	equiv.test = F,
	parallel.cores = 12
)

### 8.2.1: yearly estimates ----

p.ideal.actv.y <- results.ideal.actv.y %>%
	filter(is.na(se.clus), !is.na(estimate), subset.var.val != "all", !grepl("clm100_discl.Sub-indicator (1|2|3|4|8|9)", x)) %>%
	mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
							 p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
							 TRUE ~ "gray")) %>%
	# mutate(color = ifelse(p.value < 0.05, "black", "gray")) %>%
	mutate(x = factor(x, levels = sort(unique(results.ideal.actv.y$x)))) %>%
	ggplot(aes(y=estimate, x=as.numeric(subset.var.val), color=color)) +
	geom_hline(yintercept=0, lty=2) +
	geom_linerange(aes(ymin=estimate-p.value.adj.tcrit*std.error, ymax=estimate+p.value.adj.tcrit*std.error),
				   size=1) +
	geom_linerange(aes(ymin=estimate-1.96*std.error, ymax=estimate+1.96*std.error), 
				   size=2) + 
	geom_point(fill="white", size=3) +
	facet_wrap(x ~ ., scales = "free", labeller = labeller(
		x = c(
			`cdp_avg_score` = "Disclosure and Action Score (CDP)",
			`gjf.n_off.discr` = "Number of Discrimination\nOffenses",
			`gjf.n_off.environ` = "Number of Environmental\nViolations",
			`gjf.n_off.labor` = "Number of Labor\nViolations",
			`hrc_rating` = "LGBTQ+ Equality Score (HRC)",
			`legis.R_frac` = "% R. Legislators Lobbied",
			`opsec.R_cand_share.org_dollars` = "% PAC $ on R. Candidates",
			`opsec.R_share.org_dollars` = "% PAC $ on R. Groups"
		)
	)) +
	scale_color_identity() +
	scale_x_continuous(breaks=STUDY_YEARS[STUDY_YEARS%%2==0], name = "") +
	scale_y_continuous(name = "Correlation with Republican Political Signals") +
	# scale_shape_manual(values = 21:24) +
	theme_custom +
	theme(legend.position="top", 
		  axis.text.x = element_text(angle=30, hjust=1))
ggsave_v(p.ideal.actv.y, 
		 filename = "figures/reg/overtime_actv.pdf",
		 width = 13, height = 6)

### 8.2.2: current, lag, lead effects ----

p.ideal.actv.y2 <- results.ideal.actv.y2 %>%
	mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
							 p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
							 TRUE ~ "gray")) %>%
	mutate(y = case_when(
		y == "ideal.main.lead" ~ "y-1 (lead)",
		y == "ideal.main.lag" ~ "y+1 (lead)",
		y == "ideal.main" ~ "y (contemporary)"
	) %>% factor(., levels = c("y-1 (lead)", "y (contemporary)", "y+1 (lead)"))) %>%
	mutate(x = factor(x, levels = sort(unique(results.ideal.actv.y2$x)))) %>%
	ggplot(aes(x=estimate, y=y, color=color)) +
	geom_vline(xintercept=0, lty=2) +
	geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error),
				   size=1) +
	geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
				   size=2) + 
	geom_point(fill="white", size=3) +
	facet_grid(x ~ ., scales = "free",
			   labeller = labeller(
			   	x = c(
			   		`cdp_avg_score` = "Disclosure and Action Score (CDP)",
			   		`gjf.n_off.discr` = "Number of Discrimination\nOffenses",
			   		`gjf.n_off.environ` = "Number of Environmental\nViolations",
			   		`gjf.n_off.labor` = "Number of Labor\nViolations",
			   		`hrc_rating` = "LGBTQ+ Equality Score (HRC)",
			   		`legis.R_frac` = "% R. Legislators Lobbied",
			   		`opsec.R_cand_share.org_dollars` = "% PAC $ on R. Candidates",
			   		`opsec.R_share.org_dollars` = "% PAC $ on R. Groups"
			   	)
			   )) +
	scale_color_identity() +
	scale_y_discrete(name = "Lag/Lead of Partisan Brand Signal DV (in Years)") +
	scale_x_continuous(name = "Correlation with Republican Political Signals") +
	theme_custom +
	theme(legend.position="none",
		  strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.actv.y2, 
		 filename = "figures/reg/overtime_actv_laglead.pdf",
		 width=8, height=6)


# 9.) MEASURE ALIGNMENT ROBUSTLY PRE/POST -------------------------------------

## 9.1. Ideal Points ~ Stakeholders --------------------------------------------

specifs.ideal.stkhl.pre.post <- build_specifs(
  data = d.brands.pre.post %>%
    mutate(year = as.character(period)),
  x.vars = c(colnames(d.brands)[grepl("^R_don_share",colnames(d.brands))],
             "sl.Rep_Pct",
             "hq_pres.REP",
             "zi.pres.REP",
             "sg.pres.REP",
             "cong_house_dw_mean",
             "cong_sen_dw_mean",
             "stkhl.R"),
  y.vars = c("ideal.main"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = list("ideal.main"=0),
  se.cluster.vars = c(),
  subset.vars = c("period"),
  scale.y = TRUE,
  scale.x = TRUE
)

results.ideal.stkhl.pre.post <- measure_alignments_robustly(
  data = d.brands.pre.post %>%
    mutate(period = as.character(period)),
  lm.specifs = specifs.ideal.stkhl.pre.post, 
  lm.func = lm, 
  debug = F, 
  p.adjust = F,
  parallelize = T,
  equiv.test = T,
  parallel.cores = 12
)

### Visualise regression estimates ----
results.ideal.stkhl.pre.post %>%
  filter(is.na(se.clus), subset.var.val != "all") %>%
  # mutate(color = ifelse(p.value < p.value.adj.alpha, "black", "gray")) %>%
  mutate(color = ifelse(p.value < 0.05, "black", "gray")) %>%
  ggplot(aes(y=estimate, x=subset.var.val, color=color)) +
  geom_hline(yintercept=0, lty=2) +
  # geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
  #                size=1, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(ymin=estimate-1.96*std.error, ymax=estimate+1.96*std.error), 
                 size=2, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  facet_wrap(x ~ ., scales = "free") +
  scale_color_identity() +
  # scale_shape_manual(values = 21:24) +
  theme_custom +
  theme(legend.position="top", 
        axis.text.x = element_text(angle=20, hjust=1))

## 9.2. Ideal Points ~ Activities ----------------------------------------

specifs.ideal.actv.pre.post <- build_specifs(
  data = d.brands.pre.post,
  x.vars = c("legis.R_frac",
             "opsec.R_share.org_dollars",
             "opsec.R_cand_share.org_dollars",
             "yougov_aud.gender.Female", 
             "zippia_empl.Ethnicity.White",
             "hrc_rating", 
             "gd.Race / Ethnicity.Black or African American.avg_rating",
             "glassdoor_Rating.Diversity and Inclusion",
             "gd.Gender.Women.avg_rating",
             "gd.Sexual Orientation.LGBTQ+.avg_rating",
             "cdp_avg_score",
             # colnames(d.brands)[grepl("clm100_discl.Sub-indicator",colnames(d.brands))],
             # "clm100_discl.7.1 Metric assessment.a.Mar-22", # Brand commits to conduct all lobbying\nin line with Paris Agreement?
             # "clm100_policy.Organisation Score.March 2022", # Climate Action 100+ Organization Score (March 2022)
             "gjf.n_off.discr",
             "gjf.n_off.labor",
             "gjf.n_off.environ"),
  y.vars = c("ideal.main"),
  # y.vars = paste0("ideal.",idealpt.vars),
  x.mids = d.brands.quad.mids,
  y.mids = list("ideal.main"=0),
  se.cluster.vars = NA,
  subset.vars = c("period"),
  scale.y = TRUE,
  scale.x = TRUE
)

results.ideal.actv.pre.post <- measure_alignments_robustly(
  data = d.brands.pre.post %>%
    mutate_at(vars(starts_with("gjf.n_off")), ~log(.x+LOG_OFFSET)), 
  lm.specifs = specifs.ideal.actv.pre.post, 
  lm.func = lm, 
  debug = T,
  quad.stats = F,
  parallelize = F,
  equiv.test = T
)

### Visualise regression estimates ----
results.ideal.actv.pre.post %>%
  filter(is.na(se.clus), !is.na(estimate), subset.var.val != "all", !grepl("clm100_discl.Sub-indicator (1|2|3|4|8|9)", x)) %>%
  # mutate(color = ifelse(p.value < p.value.adj.alpha, "black", "gray")) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=estimate, x=subset.var.val, color=color)) +
  geom_hline(yintercept=0, lty=2) +
  # geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
  #                size=1, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(ymin=estimate-1.96*std.error, ymax=estimate+1.96*std.error), 
                 size=2, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  facet_wrap(x ~ ., scales = "free") +
  scale_color_identity() +
  # scale_shape_manual(values = 21:24) +
  theme_custom +
  theme(legend.position="top", 
        axis.text.x = element_text(angle=20, hjust=1))

# 10.) MEASURE ALIGNMENT HETEROGENEITIES ROBUSTLY ------------------------------

## 10.1. By Foreign vs. American HQ --------------------------------------------

specifs.ideal.het.hq <- build_specifs(
  data = d.brands %>%
    mutate(hq_main_in_US = ifelse(hq_main_in_US==1, "American-based", "Foreign-based")), 
  x.vars = c("R_don_share",
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R",
             # "legis.R_frac",
             "opsec.R_share.org_dollars",
             "glassdoor_Rating.Diversity and Inclusion",
             "cdp_avg_score",
             "hrc_rating"),
  y.vars = c("ideal.main"),
  subset.vars = c("hq_main_in_US"),
  scale.y = TRUE,
  scale.x = TRUE
)
results.ideal.het.hq <- measure_alignments_robustly(
  data = d.brands %>%
    filter(ideal.main.n > MIN_BIGRAM_COUNT) %>% 
    mutate(hq_main_in_US = ifelse(hq_main_in_US==1, "American-based", "Foreign-based")), 
  lm.specifs = specifs.ideal.het.hq, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T, quad.stats = F,
  equiv.test = F, zaminfl = F,
  parallel.cores = 12
)
#### match up with exact original results
results.ideal.het.hq <- bind_rows(results.ideal.het.hq %>%
                                    filter(subset.var.val != "all"),
                                  results.ideal.stkhl %>%
                                    filter(x %in% results.ideal.het.hq$x) %>%
                                    mutate(subset.var.val = "all") %>%
                                    select(x, y, subset.var.val, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit),
                                  results.ideal.actv %>%
                                    filter(x %in% results.ideal.het.hq$x) %>%
                                    mutate(subset.var.val = "all") %>%
                                    select(x, y, subset.var.val, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit))

### Visualise regression estimates ----
p.ideal.het.hq <- results.ideal.het.hq %>%
  filter(is.na(se.clus), !is.na(estimate)) %>%
  mutate(subset.var.val = gsub("all","All",subset.var.val)) %>%
  mutate(shape = ifelse(subset.var.val == "All", 23, 16)) %>%
  mutate(x.name = sanitize_var(x, add_category = T, multi_line = T)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=subset.var.val, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1) + 
  geom_point(fill="white", size=3) +
  facet_grid(x.name ~ ., scales = "free", space = "free_y") +
  scale_x_continuous(name = TeX("Coefficient estimate within brand group"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Number of firm employees") +
  geom_text(data = data.frame(x.name = "Workplace Rating:\nLGBTQ+ Equality Score\n(HRC)"),
            x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
            hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  scale_fill_identity() +
  scale_shape_identity() +
  labs(caption = paste0(mean(d.brands$hq_main_in_US)*100, "% (n=",sum(d.brands$hq_main_in_US),") brands with main HQ in U.S.")) +
  theme_custom +
  theme(legend.position="top",
        strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.het.hq, 
         filename = "figures/reg/het_hq.pdf",
         width=8, height=8)

## 10.2. By Number of Employees ------------------------------------------------

specifs.ideal.het.empl <- build_specifs(
  data = d.brands,
  x.vars = c("R_don_share",
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R",
             # "legis.R_frac",
             "opsec.R_share.org_dollars",
             "glassdoor_Rating.Diversity and Inclusion",
             "cdp_avg_score",
             "hrc_rating"),
  y.vars = c("ideal.main"),
  subset.vars = c("num_empl.2"),
  scale.y = TRUE,
  scale.x = TRUE
)
results.ideal.het.empl <- measure_alignments_robustly(
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  lm.specifs = specifs.ideal.het.empl, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T, quad.stats = F,
  equiv.test = F, zaminfl = F,
  parallel.cores = 12
)
#### match up with exact original results
results.ideal.het.empl <- bind_rows(results.ideal.het.empl %>%
                                      filter(subset.var.val != "all"),
                                    results.ideal.stkhl %>%
                                      filter(x %in% results.ideal.het.empl$x) %>%
                                      mutate(subset.var.val = "all") %>%
                                      select(x, y, subset.var.val, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit),
                                    results.ideal.actv %>%
                                      filter(x %in% results.ideal.het.empl$x) %>%
                                      mutate(subset.var.val = "all") %>%
                                      select(x, y, subset.var.val, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit))

### Visualise regression estimates ----
p.ideal.het.empl <- results.ideal.het.empl %>%
  filter(is.na(se.clus), !is.na(estimate)) %>%
  mutate(subset.var.val = stringr::str_to_title(subset.var.val)) %>%
  mutate(shape = ifelse(subset.var.val == "All", 23, 16)) %>%
  mutate(x.name = sanitize_var(x, add_category = T, multi_line = T)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=subset.var.val, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1) + 
  geom_point(fill="white", size=3) +
  facet_grid(x.name ~ ., scales = "free", space = "free_y") +
  scale_x_continuous(name = TeX("Coefficient estimate within brand group"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Number of firm employees") +
  geom_text(data = data.frame(x.name = "Workplace Rating:\nLGBTQ+ Equality Score\n(HRC)"),
            x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
            hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  scale_fill_identity() +
  scale_shape_identity() +
  theme_custom +
  theme(legend.position="top",
        strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.het.empl, 
         filename = "figures/reg/het_empl.pdf",
         width=8, height=8)

## 10.3. By Industry -----------------------------------------------------------

specifs.ideal.het.ind <- build_specifs(
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  x.vars = c("R_don_share",
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             # "stkhl.R",
             # "legis.R_frac",
  		     "gjf.n_off.environ",
             "opsec.R_share.org_dollars",
             "glassdoor_Rating.Diversity and Inclusion",
             "cdp_avg_score",
             "hrc_rating"),
  y.vars = c("ideal.main"),
  subset.vars = c("yougov_brand_category.2"),
  scale.y = TRUE,
  scale.x = TRUE
)
results.ideal.het.ind <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.het.ind, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T, quad.stats = F,
  equiv.test = F, zaminfl = F,
  parallel.cores = 12
)
#### match up with exact original results
results.ideal.het.ind <- bind_rows(results.ideal.het.ind %>%
                                     filter(subset.var.val != "all"),
                                   results.ideal.stkhl %>%
                                     filter(x %in% results.ideal.het.ind$x) %>%
                                     mutate(subset.var.val = "all") %>%
                                     select(x, y, subset.var.val, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit),
                                   results.ideal.actv %>%
                                     filter(x %in% results.ideal.het.ind$x) %>%
                                     mutate(subset.var.val = "all") %>%
                                     select(x, y, subset.var.val, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit))

### Visualise regression estimates ----
p.ideal.het.ind <- results.ideal.het.ind %>%
  filter(is.na(se.clus), !is.na(estimate)) %>%
  mutate(subset.var.val = stringr::str_to_title(subset.var.val)) %>%
  mutate(shape = ifelse(subset.var.val == "All", 23, 16)) %>%
  mutate(x.name = sanitize_var(x, add_category = T, multi_line = T)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=subset.var.val, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1) + 
  geom_point(fill="white", size=3) +
  facet_grid(x.name ~ ., scales = "free", space = "free_y") +
  scale_x_continuous(name = TeX("Coefficient estimate within industry"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Industries") +
  geom_text(data = data.frame(x.name = "Workplace Rating:\nLGBTQ+ Equality Score\n(HRC)"),
            x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
            hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  scale_fill_identity() +
  scale_shape_identity() +
  theme_custom +
  theme(legend.position="top",
        strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.het.ind, 
         filename = "figures/reg/het_ind.pdf",
         width=8, height=11)

## 10.4. By Follower Count -----------------------------------------------------

specifs.ideal.het.foll <- build_specifs(
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  x.vars = c("R_don_share",
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R",
             "legis.R_frac",
             "opsec.R_share.org_dollars",
             "glassdoor_Rating.Diversity and Inclusion",
             "cdp_avg_score",
             "hrc_rating"),
  y.vars = c("ideal.main"),
  subset.vars = c("tw_followers.2"),
  scale.y = TRUE,
  scale.x = TRUE
)
results.ideal.het.foll <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.het.foll, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T, quad.stats = F,
  equiv.test = F, zaminfl = F,
  parallel.cores = 12
)
### Visualise regression estimates ----
p.ideal.het.foll <- results.ideal.het.foll %>%
  filter(is.na(se.clus), !is.na(estimate)) %>%
  mutate(subset.var.val = stringr::str_to_title(subset.var.val)) %>%
  mutate(shape = ifelse(subset.var.val == "All", 23, 16)) %>%
  mutate(x.name = sanitize_var(x, add_category = T, multi_line = T)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=subset.var.val, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1) + 
  geom_point(fill="white", size=3) +
  facet_grid(x.name ~ ., scales = "free", space = "free_y") +
  scale_x_continuous(name = TeX("Coefficient estimate within brand group"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Number of Twitter Followers") +
  geom_text(data = data.frame(x.name = "Workplace Rating:\nLGBTQ+ Equality Score\n(HRC)"),
            x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
            hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  scale_fill_identity() +
  scale_shape_identity() +
  theme_custom +
  theme(legend.position="top",
        strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.het.ind, 
         filename = "figures/reg/het_foll.pdf",
         width=8, height=11)

## 10.4. By Tweet Count --------------------------------------------------------

specifs.ideal.het.tw <- build_specifs(
  data = d.brands %>% filter(ideal.main.n > MIN_BIGRAM_COUNT),
  x.vars = c("R_don_share",
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R",
             "legis.R_frac",
             "opsec.R_share.org_dollars",
             "glassdoor_Rating.Diversity and Inclusion",
             "cdp_avg_score",
             "hrc_rating"),
  y.vars = c("ideal.main"),
  subset.vars = c("tw_count.2"),
  scale.y = TRUE,
  scale.x = TRUE
)
results.ideal.het.tw <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.het.tw, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T, quad.stats = F,
  equiv.test = F, zaminfl = F,
  parallel.cores = 12
)
### Visualise regression estimates ----
p.ideal.het.tw <- results.ideal.het.tw %>%
  filter(is.na(se.clus), !is.na(estimate)) %>%
  mutate(subset.var.val = stringr::str_to_title(subset.var.val)) %>%
  mutate(shape = ifelse(subset.var.val == "All", 23, 16)) %>%
  mutate(x.name = sanitize_var(x, add_category = T, multi_line = T)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=subset.var.val, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1) + 
  geom_point(fill="white", size=3) +
  facet_grid(x.name ~ ., scales = "free", space = "free_y") +
  scale_x_continuous(name = TeX("Coefficient estimate within brand group"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Number of Tweets in Period") +
  geom_text(data = data.frame(x.name = "Workplace Rating:\nLGBTQ+ Equality Score\n(HRC)"),
            x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
            hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  scale_fill_identity() +
  scale_shape_identity() +
  theme_custom +
  theme(legend.position="top",
        strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.het.ind, 
         filename = "figures/reg/het_tw.pdf",
         width=8, height=11)


## 10.5. By Partisan Phrases ---------------------------------------------------

specifs.ideal.het.n <- build_specifs(
  data = d.brands,
  x.vars = c("R_don_share",
             "twitter.foll_ideo_slant",
             "hq_pres.REP", "zi.pres.REP",
             "cong_house_dw_mean", "cong_sen_dw_mean",
             "stkhl.R",
             "legis.R_frac",
             "opsec.R_share.org_dollars",
             "glassdoor_Rating.Diversity and Inclusion",
             "cdp_avg_score",
             "hrc_rating"),
  y.vars = c("ideal.main"),
  subset.vars = c("ideal.main.n"),
  scale.y = TRUE,
  scale.x = TRUE
)
results.ideal.het.n <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.het.n, 
  lm.func = lm, 
  debug = F,
  p.adjust.group.vars = c("y", "subset.var.val"),
  parallelize = T, quad.stats = F,
  equiv.test = F, zaminfl = F,
  parallel.cores = 12
)
### Visualise regression estimates ----
p.ideal.het.n <- results.ideal.het.n %>%
  filter(is.na(se.clus), !is.na(estimate)) %>%
  mutate(subset.var.val = stringr::str_to_title(subset.var.val)) %>%
  mutate(shape = ifelse(subset.var.val == "All", 23, 16)) %>%
  mutate(x.name = sanitize_var(x, add_category = T, multi_line = T)) %>%
  mutate(x.cat = categorize_var(x, multi_line = T)) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  ggplot(aes(y=subset.var.val, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1) + 
  geom_point(fill="white", size=3) +
  facet_grid(x.name ~ ., scales = "free", space = "free_y") +
  scale_x_continuous(name = TeX("Coefficient estimate within brand group"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Number of Tweets in Period") +
  geom_text(data = data.frame(x.name = "Workplace Rating:\nLGBTQ+ Equality Score\n(HRC)"),
            x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
            hjust=1, vjust=-1, inherit.aes=F)  +
  scale_color_identity() +
  scale_fill_identity() +
  scale_shape_identity() +
  theme_custom +
  theme(legend.position="top",
        strip.text.y = element_text(angle=0))
ggsave_v(p.ideal.het.ind, 
         filename = "figures/reg/het_n.pdf",
         width=8, height=11)

# 11.) MEASURE SELECTION ROBUSTLY ----------------------------------------------

## 11.1. I(Outcome) ~ Covariates ------------------------------------------------

specifs.slxn.ideal <- build_specifs(
  data = d.brands,
  x.vars = c(colnames(d.brands)[grepl("^R_don_share",colnames(d.brands))],
             "twitter.foll_ideo_slant",
             "hq_pres.REP",
             "zi.pres.REP",
             "sg.pres.REP",
             "cong_house_dw_mean",
             "cong_sen_dw_mean",
             "stkhl.R",
             "legis.R_frac",
             "opsec.R_share.org_dollars",
             "opsec.R_cand_share.org_dollars",
             "hrc_rating", 
             colnames(d.brands)[grepl("^(yougov_aud|yougov_pct).*(educ|gender|Income|empl|region)", colnames(d.brands))],             
             colnames(d.brands)[grepl("^(zippia_empl).*(Genders|Ethn|Degrees|Ages)", colnames(d.brands))],             
             "gd.Race / Ethnicity.Black or African American.avg_rating",
             "glassdoor_Rating.Diversity and Inclusion",
             "gd.Gender.Women.avg_rating",
             "gd.Sexual Orientation.LGBTQ+.avg_rating",
             "cdp_avg_score",
             "clm100_policy.Organisation Score.March 2022",
             "rev_mil.final.log",
             "num_empl.final.log",
             "tw_count",
             "tw_followers"
             ),
  # y.vars = c("ideal.main.nonzero"),
  y.vars = c("ideal.main.n"),
  se.cluster.vars = NA,
  scale.y = FALSE,
  scale.x = TRUE
)
results.slxn.ideal <- measure_alignments_robustly(
  data = d.brands %>%
    mutate(ideal.main.nonzero = ifelse(is.na(ideal.main.n), 0, 1),
           # tw_count = log(tw_count),
           # tw_followers = log(tw_followers),
           ideal.main.n = ifelse(is.na(ideal.main.n), 0, ideal.main.n)),
  lm.specifs = specifs.slxn.ideal, 
  # lm.func = lm,
  lm.func = MASS::glm.nb,
  debug = F, quad.stats = F, parallelize = F, equiv.test = F, zaminfl = F
)

### Visualise regression estimates ----
p.slxn.ideal <- results.slxn.ideal %>%
  filter(is.na(se.clus)) %>%
  # mutate(color = ifelse(p.value < 0.05, "black", "gray")) %>%
  # mutate(color = ifelse(p.value < p.value.adj.alpha, "black", "gray")) %>%
  mutate(color = case_when(p.value < p.value.adj.alpha ~ "black",
                           p.value < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%  
  mutate(x.name = sanitize_var(x, add_category = T)) %>%
  mutate(x.cat = categorize_var(x, multi_line = F)) %>%
  rowwise() %>%
  mutate(var_type = strsplit(x, split="\\.")[[1]][2],
         var_val = strsplit(x, split="\\.")[[1]][3]) %>%  
  mutate(var_val = gsub("_"," ",var_val),
         var_val = gsub("\\(.*","",var_val),
         x.name = case_when(
           grepl("pct_recog", x, ignore.case=T) ~ "% Recognition",
           grepl("pct_pos", x, ignore.case=T) ~ "% Pos. Opinion",
           grepl("pct_neg", x, ignore.case=T) ~ "% Neg. Opinion",
           grepl("(gd|glassdoor|hrc|Firm)", x.name, ignore.case = T) ~ paste0(gsub(".*\\:", "", x.name)),
           grepl("(zippia|yougov_aud)", x, ignore.case=T) ~ var_val,
           TRUE ~ x.name
         ),
         x.cat = case_when(
           grepl("pct", x, ignore.case=T) ~ "Firm",
           grepl("(tw_foll|tw_count)", x, ignore.case=T) ~ "Firm",
           grepl("zippia", x, ignore.case=T) & grepl("degrees", x, ignore.case=T) ~ "Employees: Educ.",
           grepl("zippia", x, ignore.case=T) & grepl("ethn", x, ignore.case=T) ~ "Employees: Race",
           grepl("zippia", x, ignore.case=T) & grepl("age", x, ignore.case=T) ~ "Employees: Age",
           grepl("zippia", x, ignore.case=T) & grepl("gender", x, ignore.case=T) ~ "Employees: Gender",
           grepl("yougov_aud", x, ignore.case=T) & grepl("empl", x, ignore.case=T) ~ "Consumers: Employment",
           grepl("yougov_aud", x, ignore.case=T) & grepl("income", x, ignore.case=T) ~ "Consumers: Income",
           grepl("yougov_aud", x, ignore.case=T) & grepl("region", x, ignore.case=T) ~ "Employees: Region",
           grepl("yougov_aud", x, ignore.case=T) & grepl("gender", x, ignore.case=T) ~ "Consumers: Gender",
           # grepl("pct", x, ignore.case=T) ~ "Overall",
           TRUE ~ x.cat
         )) %>%
  group_by(x.cat) %>%
  arrange(-estimate) %>%
  mutate(x.name = as_factor(x.name)) %>%
  ggplot(aes(y=x.name, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), size=1) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), size=2) + 
  geom_point(size=3) +
  facet_grid(x.cat ~ ., scales = "free_y", space = "free_y") +
  scale_color_identity() +
  xlab("Standardized negative binomial coefficient estimate") +
  ylab("") +
  geom_text(data = data.frame(x.cat = "Workplace Environment"), 
            x=-Inf, y=0, size=3, label=TeX("$\\leftarrow$ Fewer brand signals"), 
            hjust=-0.1, vjust=-1, inherit.aes=F) +  
  theme_custom_vertpanel
ggsave_v(p.slxn.ideal, 
         filename = "figures/reg/slxn_ideal.pdf",
         width = 10, height = 11)

# 12.) MEASURE MISSINGNESS ROBUSTLY --------------------------------------------

## 12.1. I(Covariate) ~ Other Covariates ----------------------------------------

summary(lm(hrc_rating.missing ~ ideal.main,
           mutate(d.brands, hrc_rating.missing = is.na(hrc_rating))))

specifs.missing.x <- build_specifs(
  data = d.brands,
  y.vars = c(colnames(d.brands)[grepl("^R_don_share",colnames(d.brands))],
             "twitter.foll_ideo_slant",
             "sl.Rep_Pct.2017_02",
             "sl.Rep_Pct.2022_10",
             "hq_pres.REP",
             "zi.pres.REP",
             "sg.pres.REP",
             "cong_house_dw_mean",
             "cong_sen_dw_mean",
             # "legis.R_frac",
             "opsec.R_share.org_dollars",
             "opsec.R_cand_share.org_dollars",
             "hrc_rating", 
             "zippia_empl.Ages.18-20",
             "glassdoor_Rating.Diversity and Inclusion",
             "cdp_avg_score",
             "clm100_policy.Organisation Score.March 2022"#,
             # "rev_mil.final.log",
             # "num_empl.final.log"
             ),
  x.vars = c("ideal.main", "ideal.main.n"),
  se.cluster.vars = NA,
  bin.y.missing = TRUE,
  scale.y = TRUE,
  scale.x = TRUE
)
results.missing.x <- analyze_missingness(
  data = d.brands,
  p.adjust.group.vars = c("x"),
  lm.specifs = specifs.missing.x,
  debug = F
)

### Visualise regression estimates ----
p.missing.x <- results.missing.x %>%
  filter(x == "ideal.main.n") %>%
  mutate(color = case_when(y.x.missing.pval < y.x.missing.p.value.adj.alpha ~ "black",
                           y.x.missing.pval < GLOBAL_ALPHA_THRESHOLD ~ "gray25",
                           TRUE ~ "gray")) %>%
  mutate(y.name = sanitize_var(y, add_category = F)) %>%
  rowwise() %>%
  mutate(y.cat = case_when(
    grepl("(R_don_share)", y, ignore.case=T) ~ "FEC",
    grepl("(twitter|sl)", y, ignore.case=T) ~ "Twitter",
    grepl("(gd|Glassdoor|empl)", y, ignore.case=T) ~ "Other",
    grepl("(hq|loc|pres|cong)", y, ignore.case=T) ~ "Geographic",
    grepl("(opsec|legis)", y, ignore.case=T) ~ "Political Activities",
    grepl("(HRC|cdp|clim)", y.name, ignore.case=T) ~ "Climate/DEI Ratings"
  )) %>%
  mutate(y.name = gsub("R. Direction of HQ", "", y.name)) %>%
  mutate(y.name = gsub(".*Candidates", "Candidate PACs", y.name)) %>%
  mutate(y.name = gsub(".*Groups", "Outside Group PACs", y.name)) %>%
  mutate(y.name = case_when(
    grepl("(gd|Glassdoor)", y.name, ignore.case=T) ~ "Glassdoor Employee Ratings",
    grepl("empl", y.name, ignore.case=T) ~ "Zippia Employee Demographics",
    TRUE ~ y.name
  )) %>%
  ### remove yougov covariates, take those for granted
  filter(!grepl("yougov|log", y), !is.na(y.x.missing.est)) %>%
  group_by(y.cat) %>%
  arrange(-y.x.missing.est) %>%
  mutate(y.name = as_factor(y.name)) %>%
  # mutate(y.name = as_factor(coalesce(y.name, y))) %>%
  ggplot(aes(y=y.name, x=y.x.missing.est, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=y.x.missing.est-y.x.missing.p.value.adj.tcrit*y.x.missing.std.err, xmax=y.x.missing.est+y.x.missing.p.value.adj.tcrit*y.x.missing.std.err), size=1) + 
  geom_linerange(aes(xmin=y.x.missing.est-1.96*y.x.missing.std.err, xmax=y.x.missing.est+1.96*y.x.missing.std.err), size=2) + 
  geom_point(size=3) +
  # facet_grid(y.cat ~ x, scales = "free", space = "free") +
  facet_grid(y.cat ~ ., scales = "free", space = "free") +
  scale_color_identity() +
  scale_x_continuous(name = TeX("Coefficient estimate")) + 
  ylab("Missingness in....") +
  geom_text(data = data.frame(y.cat = "Twitter"),
            x=Inf, y=2.25, size=3, label=TeX("R-leaning brand signal predicts more missingness $\\rightarrow$ "),
            hjust=1, vjust=0, inherit.aes=F) +
  theme_custom_vertpanel
ggsave_v(p.missing.x, 
         filename = "figures/reg/missing_x.pdf",
         width = 10, height = 7)

# 13.) MEASURE INDUSTRY DISTRIBUTIONS -------------------------------------


### Visualise regression estimates ----
p.ideal.ind <- d.brands %>%
  lm_robust(scale(ideal.main) ~ yougov_brand_category, data = .) %>%
  tidy() %>%
  arrange(p.value) %>%
  mutate(p.value.adj.method = "BHq",
         k = n(), ## number of hypotheses
         r = 1:n(), ## rank of p-values
         p.value.adj.alpha = (r*GLOBAL_ALPHA_THRESHOLD)/k, ## stepped-up thresholds
         p.value.adj.sig = p.value < p.value.adj.alpha, ## stepped-up hypothesis tests
         p.value.adj.zcrit = qnorm(1 - (p.value.adj.alpha)/2), ## new critical values for asymptotic CIs
  ) %>%
  mutate(term=gsub("yougov_brand_category","",term)) %>%
  filter(!grepl("(comm|media|network)", term)) %>%
  mutate(term = str_to_title(term)) %>%  
  arrange(-estimate) %>%
  mutate(term = as_factor(term)) %>%
  mutate(color = ifelse(p.value.adj.sig, "black", "gray")) %>%
  ggplot(aes(y=term, x=estimate, color=color)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.zcrit*std.error, xmax=estimate+p.value.adj.zcrit*std.error), 
                 size=1, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=2, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  scale_x_continuous(name = TeX("Coefficient estimate")) +
  scale_y_discrete(name = "") +
  scale_color_identity() +
  scale_shape_manual(values = 21:24) +
  geom_text(data = data.frame(x=-Inf), 
            x=-Inf, y=0, size=3, label=TeX("$\\leftarrow$ Industry more Dem-sounding relative to Airlines"), 
            hjust=-0.1, vjust=-1, inherit.aes=F) + 
  theme_custom +
  theme(legend.position="none",
        strip.text.y=element_text(angle=0))
ggsave_v(p.ideal.ind, 
         filename = "figures/reg/indus.pdf",
         width=10, height=4)

# 14.) STUDY STANDARD ERRORS ----------------------------------------------

## 14.1. With Clustering --------------------------------------------------

specifs.ideal.se.clus <- build_specifs(
  data = d.brands %>%
    mutate(parent = ifelse(is.na(parent), yougov_name, parent)), 
  y.vars = c("ideal.main"), x.vars = c("R_don_share","twitter.foll_ideo_slant","hq_pres.REP","zi.pres.REP","cong_house_dw_mean","cong_sen_dw_mean","stkhl.R"),
  se.cluster.vars = c("yougov_brand_category.2","parent"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.se.clus <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.se.clus, lm.func = lm, 
  parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)

## 14.2. Weighting by Ideal N ---------------------------------------------

specifs.ideal.se.n <- build_specifs(
  data = d.brands, 
  y.vars = c("ideal.main"), x.vars = c("R_don_share","twitter.foll_ideo_slant","hq_pres.REP","zi.pres.REP","cong_house_dw_mean","cong_sen_dw_mean","stkhl.R"),
  weight.vars = c("ideal.main.n"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.se.n <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.se.n, lm.func = lm, 
  parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)

## 14.2.1 Weighting by Ideal Model SE ------------------------------------------

d.brands.ideal.mdl.se <- scaled_text_alt$par_boot_mdl$brands_bigrams_pois_boots %>%
  group_by(yougov_name) %>%
  summarise(ideal.mdl.se = sd(slant, na.rm=T), .groups = "drop")

specifs.ideal.mdl.se <- build_specifs(
  data = d.brands %>%
    left_join(d.brands.ideal.mdl.se, by = "yougov_name") %>%
    mutate(ideal.mdl.se.inv = 1/(ideal.mdl.se)), 
  y.vars = c("ideal.mdl"), x.vars = c("R_don_share","twitter.foll_ideo_slant","hq_pres.REP","zi.pres.REP","cong_house_dw_mean","cong_sen_dw_mean","stkhl.R"),
  weight.vars = c("ideal.mdl.se.inv"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.mdl.se <- measure_alignments_robustly(
  data = d.brands %>%
    left_join(d.brands.ideal.mdl.se, by = "yougov_name") %>%
    mutate(ideal.mdl.se.inv = 1/(ideal.mdl.se)), 
  lm.specifs = specifs.ideal.mdl.se, lm.func = lm, 
  parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)

## 14.3. Uncertainty in FEC donations ------------------------------------------

specifs.ideal.uncert.fec <- build_specifs(
  data = d.brands %>%
    mutate(don_doll = coalesce(FEC.don_D+FEC.don_R, opsec.R.indiv_dollars+opsec.D.indiv_dollars)), 
  x.vars = c("R_don_share"), y.vars = c("ideal.main"), weight.vars = c("don_doll"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.fec <- measure_alignments_robustly(
  data = d.brands %>%
    mutate(don_doll = coalesce(FEC.don_D+FEC.don_R, opsec.R.indiv_dollars+opsec.D.indiv_dollars)), 
  lm.specifs = specifs.ideal.uncert.fec, lm.func = lm, 
  parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)

## 14.4. Uncertainty in PAC contributions --------------------------------------

specifs.ideal.uncert.pac <- build_specifs(
  data = d.brands %>%
    mutate(pac_doll = opsec.R.org_dollars+opsec.D.org_dollars), 
  x.vars = c("opsec.R_cand_share.org_dollars"), y.vars = c("ideal.main"), weight.vars = c("pac_doll"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.pac <- measure_alignments_robustly(
  data = d.brands %>%
    mutate(pac_doll = opsec.R.org_dollars+opsec.D.org_dollars),  
  lm.specifs = specifs.ideal.uncert.pac, lm.func = lm, 
  parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)

## 14.5. Uncertainty in lobbying -----------------------------------------------

specifs.ideal.uncert.legis <- build_specifs(
  data = d.brands %>%
    mutate(don_doll = coalesce(FEC.don_D+FEC.don_R, opsec.R.indiv_dollars+opsec.D.indiv_dollars)), 
  x.vars = c("legis.R_frac"), y.vars = c("ideal.main"), weight.vars = c("legis.n"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.legis <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.uncert.legis, lm.func = lm, 
  debug = F, parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)

## 14.6. Uncertainty in Twitter followings -------------------------------------

specifs.ideal.uncert.tw <- build_specifs(
  data = d.brands, 
  x.vars = c("twitter.foll_ideo_slant"), y.vars = c("ideal.main"), weight.vars = c("twitter.foll_ideo_accts_n"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.tw <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.uncert.tw, lm.func = lm, 
  debug = F, parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)
## 14.7. Uncertainty in Glassdoor reviews --------------------------------------

specifs.ideal.uncert.gd_women <- build_specifs(
  data = d.brands, 
  x.vars = c("gd.Gender.Women.avg_rating"), y.vars = c("ideal.main"), weight.vars = c("gd.Gender.Women.num_reviews"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.gd_women <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.uncert.gd_women, lm.func = lm, 
  debug = F, parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)
#
specifs.ideal.uncert.gd_black <- build_specifs(
  data = d.brands, 
  x.vars = c("gd.Race / Ethnicity.Black or African American.avg_rating"), y.vars = c("ideal.main"), weight.vars = c("gd.Race / Ethnicity.Black or African American.num_reviews"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.gd_black <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.uncert.gd_black, lm.func = lm, 
  debug = F, parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)
#
specifs.ideal.uncert.gd_lgbtq <- build_specifs(
  data = d.brands, 
  x.vars = c("gd.Sexual Orientation.LGBTQ+.avg_rating"), y.vars = c("ideal.main"), weight.vars = c("gd.Sexual Orientation.LGBTQ+.num_reviews"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.gd_lgbtq <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.uncert.gd_lgbtq, lm.func = lm, 
  debug = F, parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)

## 14.8. Uncertainty in location voteshares ------------------------------------

specifs.ideal.uncert.sg_locs <- build_specifs(
  data = d.brands, 
  x.vars = c("sg.pres.REP"), y.vars = c("ideal.main"), weight.vars = c("sg.n.locs"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.sg_locs <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.uncert.sg_locs, lm.func = lm, 
  debug = F, parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)
#
specifs.ideal.uncert.zi_locs <- build_specifs(
  data = d.brands, 
  x.vars = c("zi.pres.REP"), y.vars = c("ideal.main"), weight.vars = c("zi.n.locs"),
  scale.y = TRUE, scale.x = TRUE
)
results.ideal.uncert.zi_locs <- measure_alignments_robustly(
  data = d.brands, 
  lm.specifs = specifs.ideal.uncert.zi_locs, lm.func = lm, 
  debug = F, parallelize = F, quad.stats = F, equiv.test = F, zaminfl = F,
)

## Visualise combined ----------------------------------------------------------

d.ideal.uncert <- bind_rows(
  results.ideal.se.clus %>% 
    ungroup() %>%
    mutate(specif = case_when(#se.clus == "" ~ "Main",
                              se.clus == "yougov_brand_category.2" ~ "SE Clustered: Industry",
                              se.clus == "parent" ~ "SE Clustered: Parent Firm")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)
  ,
  results.ideal.se.n %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              w == "ideal.main.n" ~ "Weight: # Partisan Phrases")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)  
  ,
  results.ideal.mdl.se %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              w == "ideal.mdl.se.inv" ~ "Weight: Bootstrapped SE")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)  
  ,
  results.ideal.uncert.fec %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              w == "don_doll" ~ "Weight: $ Donated")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)  
  ,
  results.ideal.uncert.pac %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              w == "pac_doll" ~ "Weight: $ Spent")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)  
  ,
  results.ideal.uncert.legis %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              w == "legis.n" ~ "Weight: # Legislators Lobbied")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)
  ,
  results.ideal.uncert.tw %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              w == "twitter.foll_ideo_accts_n" ~ "Weight: # Partisan Accounts")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)
  ,
  results.ideal.uncert.gd_women %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              grepl("num_reviews", w) ~ "Weight: # Reviews")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)
  ,
  results.ideal.uncert.gd_black %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              grepl("num_reviews", w) ~ "Weight: # Reviews")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)
  ,
  results.ideal.uncert.gd_lgbtq %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              grepl("num_reviews", w) ~ "Weight: # Reviews")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)
  ,
  results.ideal.uncert.sg_locs %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              grepl("locs", w) ~ "Weight: # Locations")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)
  ,
  results.ideal.uncert.zi_locs %>%
    ungroup() %>%
    mutate(specif = case_when(#w == "" ~ "Main",
                              grepl("locs", w) ~ "Weight: # Locations")) %>%
    select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit)
) #%>% distinct(x, specif, .keep_all = TRUE)

d.ideal.uncert <- bind_rows(d.ideal.uncert,
                            results.ideal.stkhl %>%
                              filter(x %in% d.ideal.uncert$x) %>%
                              mutate(specif = "Main") %>%
                              select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit),
                            results.ideal.actv %>%
                              filter(x %in% d.ideal.uncert$x) %>%
                              mutate(specif = "Main") %>%
                              select(x, y, specif, estimate, std.error, p.value, p.value.adj.sig, p.value.adj.tcrit))

p.ideal.uncert <- d.ideal.uncert %>%
  # mutate(x.name = x) %>%
  mutate(x.name = sanitize_var(x, multi_line = F, add_category = T)) %>%
  # mutate(x.name = paste0(x.name, "\n")) %>%
  mutate(color = ifelse(p.value.adj.sig, "black", "gray")) %>%
  mutate(shape = ifelse(specif == "Main", 23, 16)) %>%
  ggplot(aes(y=specif, x=estimate, color=color, shape=shape)) +
  geom_vline(xintercept=0, lty=2) +
  geom_linerange(aes(xmin=estimate-p.value.adj.tcrit*std.error, xmax=estimate+p.value.adj.tcrit*std.error), 
                 size=0.5, position = position_dodge(width=0.8)) + 
  geom_linerange(aes(xmin=estimate-1.96*std.error, xmax=estimate+1.96*std.error), 
                 size=1, position = position_dodge(width=0.8)) + 
  geom_point(fill="white", size=3, position = position_dodge(width=0.8)) +
  scale_x_continuous(name = TeX("Coefficient estimate within specification"),
                     limits = c(-1, 1)) +
  scale_y_discrete(name = "Specifications") +
  # geom_text(data = data.frame(x.name = "% R. Legislators Lobbied"),
  #           x=Inf, y=0, size=3, label=TeX("More R. brand signal $\\rightarrow$"),
  #           hjust=1, vjust=-1, inherit.aes=F)  +
  facet_grid(x.name ~ ., scales = "free", space = "free") +
  scale_color_identity() +
  scale_shape_identity() +
  theme_custom +
  theme(legend.position="none",
        strip.text.y = element_text(angle=0, size=8, vjust=0.5))
ggsave_v(p.ideal.uncert, 
         filename = "figures/reg/uncert.pdf",
         width = 10, height = 11)

# 15.) STUDY TV ADS -------------------------------------------------------

if (!"dfm_tv_brands_df" %in% ls()) {
  load("corpspeak_tv_ads.RData")
  
  docvars(dfm_tv_brands) <- docvars(dfm_tv_brands) %>%
    left_join(d.brands %>%
                select(yougov_name, ideal.main))
  docvars(dfm_tv_brands, "ideal.main.lr") <- ifelse(docvars(dfm_tv_brands, "ideal.main") < 0, "Dem-leaning", "Rep-leaning")
  
  dfm_tv_brands_ideal <- dfm_group(dfm_tv_brands, groups=ideal.main.lr)

  textstat_keyness(dfm_tv_brands_ideal) %>%
    # mutate(feature = gsub("_"," ",feature)) %>%
    textplot_keyness(margin = 0.8, color = c("red", "blue"), n = 10) +
    # annotate("text", label=expression(NULL %<-% bold("More Dem-sounding")), x=annot$xpos[2], y=annot$ypos[2], hjust=annot$hjustvar[2], vjust=annot$vjustvar[2], fontface="bold") +
    # annotate("text", label=expression(bold("More Rep-sounding") %->% NULL), x=annot$xpos[3], y=annot$ypos[3], hjust=annot$hjustvar[3], vjust=annot$vjustvar[3], fontface="bold") +
    xlab(TeX("$\\chi^2$ of brand on social media")) +
    theme(legend.position="none")
  
}



